feat(思维导图助手):优化打包

develop
panyy 2026-07-03 13:56:00 +08:00
parent 59cbda30e2
commit 4b50b6bf4e
7 changed files with 232 additions and 32 deletions

View File

@ -38,10 +38,11 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
python3 -m pip install \
"numpy==1.26.4" \
"opencv-python==4.11.0.86" \
"pydantic>=2.12,<3" \
"modelscope" \
"magic-pdf" \
-i https://mirrors.aliyun.com/pypi/simple && \
python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
python3 -m pip cache purge
WORKDIR /app

View File

@ -18,10 +18,12 @@ RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
numpy==1.26.4 \
opencv-python==4.11.0.86 \
-i https://mirrors.aliyun.com/pypi/simple && \
python3 -m pip install --force-reinstall --no-cache-dir "pydantic>=2.12,<3" -i https://mirrors.aliyun.com/pypi/simple && \
python3 -c "import pydantic; print('pydantic', pydantic.__version__); from pydantic import TypeAdapter; print('pydantic TypeAdapter ok')" && \
python3 -m pip cache purge
# Download models and update the configuration file
RUN /bin/bash -c "mineru-models-download -s modelscope -m all"
# Set the entry point to activate the virtual environment and run the command line tool
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]
ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"]

View File

@ -361,10 +361,22 @@ def get_infer_result(file_suffix_identifier: str, pdf_name: str, parse_dir: str)
api_router = APIRouter(prefix="/api")
class MindmapLlmConfig(BaseModel):
base_url: Optional[str] = None
api_key: Optional[str] = None
model: Optional[str] = None
timeout: Optional[int] = None
temperature: Optional[float] = None
max_context_tokens: Optional[int] = None
max_output_tokens: Optional[int] = None
safety_tokens: Optional[int] = None
class MindmapOrganizeRequest(BaseModel):
markdown: str
mode: str = "smart"
prompt: Optional[str] = None
llm_config: Optional[MindmapLlmConfig] = None
def _extract_json_object(text: Optional[str]) -> str:
@ -436,16 +448,34 @@ def _estimate_tokens(text: str) -> int:
return max(1, int(ascii_chars / 4) + int(non_ascii_chars * 1.5))
def _get_mindmap_max_output_tokens() -> int:
return int(os.getenv("MINDMAP_LLM_MAX_OUTPUT_TOKENS", "4096"))
def _optional_str(value: Optional[str], env_name: str, default: str = "") -> str:
configured = (value or "").strip()
return configured or os.getenv(env_name, default)
def _get_mindmap_context_budget(prompt: str, reserve_output_tokens: Optional[int] = None) -> tuple[int, int]:
def _optional_int(value: Optional[int], env_name: str, default: int) -> int:
return int(value if value is not None else os.getenv(env_name, str(default)))
def _optional_float(value: Optional[float], env_name: str, default: float) -> float:
return float(value if value is not None else os.getenv(env_name, str(default)))
def _get_mindmap_max_output_tokens(llm_config: Optional[MindmapLlmConfig] = None) -> int:
value = llm_config.max_output_tokens if llm_config else None
return _optional_int(value, "MINDMAP_LLM_MAX_OUTPUT_TOKENS", 4096)
def _get_mindmap_context_budget(
prompt: str,
reserve_output_tokens: Optional[int] = None,
llm_config: Optional[MindmapLlmConfig] = None
) -> tuple[int, int]:
if reserve_output_tokens is None:
reserve_output_tokens = _get_mindmap_max_output_tokens()
max_context_tokens = int(os.getenv("MINDMAP_LLM_MAX_CONTEXT_TOKENS", "32768"))
reserve_output_tokens = _get_mindmap_max_output_tokens(llm_config)
max_context_tokens = _optional_int(llm_config.max_context_tokens if llm_config else None, "MINDMAP_LLM_MAX_CONTEXT_TOKENS", 32768)
prompt_tokens = _estimate_tokens(prompt)
safety_tokens = int(os.getenv("MINDMAP_LLM_SAFETY_TOKENS", "1024"))
safety_tokens = _optional_int(llm_config.safety_tokens if llm_config else None, "MINDMAP_LLM_SAFETY_TOKENS", 1024)
input_budget_tokens = max(2048, max_context_tokens - prompt_tokens - reserve_output_tokens - safety_tokens)
logger.info(
"Mindmap context budget max_context_tokens={} prompt_tokens={} reserve_output_tokens={} safety_tokens={} input_budget_tokens={}",
@ -527,12 +557,20 @@ def _chunk_markdown_by_headings(markdown: str, max_tokens: int) -> list[str]:
return chunks or [markdown]
def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None, request_role: str = "organize") -> str:
base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
timeout = int(os.getenv("MINDMAP_LLM_TIMEOUT", "180"))
max_output_tokens = _get_mindmap_max_output_tokens()
def _call_mindmap_llm(
markdown: str,
mode: str = "smart",
custom_prompt: Optional[str] = None,
task_id: Optional[str] = None,
request_role: str = "organize",
llm_config: Optional[MindmapLlmConfig] = None
) -> str:
base_url = _optional_str(llm_config.base_url if llm_config else None, "MINDMAP_LLM_BASE_URL").rstrip("/")
model = _optional_str(llm_config.model if llm_config else None, "MINDMAP_LLM_MODEL", "gemma-4-26B")
api_key = _optional_str(llm_config.api_key if llm_config else None, "MINDMAP_LLM_API_KEY")
timeout = _optional_int(llm_config.timeout if llm_config else None, "MINDMAP_LLM_TIMEOUT", 180)
max_output_tokens = _get_mindmap_max_output_tokens(llm_config)
temperature = _optional_float(llm_config.temperature if llm_config else None, "MINDMAP_LLM_TEMPERATURE", 0.2)
if not base_url:
raise RuntimeError("未配置智能整理模型服务,请设置 MINDMAP_LLM_BASE_URL")
@ -545,8 +583,9 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
{compact_markdown}
"""
logger.info(
"Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={}",
task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown), len(prompt_template), max_output_tokens
"Mindmap LLM request start task_id={} role={} model={} base_url={} mode={} input_chars={} input_tokens_est={} prompt_chars={} max_tokens={} timeout={} temperature={} api_key_configured={}",
task_id or "-", request_role, model, base_url, mode, len(compact_markdown), _estimate_tokens(compact_markdown),
len(prompt_template), max_output_tokens, timeout, temperature, bool(api_key)
)
payload = {
@ -555,7 +594,7 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown并严格保留原文语言。"},
{"role": "user", "content": prompt},
],
"temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
"temperature": temperature,
"max_tokens": max_output_tokens,
}
data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
@ -600,9 +639,15 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optiona
return organized
def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional[str], task_id: str) -> str:
def _organize_mindmap_markdown(
markdown: str,
mode: str,
custom_prompt: Optional[str],
task_id: str,
llm_config: Optional[MindmapLlmConfig]
) -> str:
prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
_, input_budget_tokens = _get_mindmap_context_budget(prompt_template)
_, input_budget_tokens = _get_mindmap_context_budget(prompt_template, llm_config=llm_config)
source_tokens = _estimate_tokens(markdown)
logger.info(
"Mindmap organize strategy task_id={} source_chars={} source_tokens_est={} input_budget_tokens={}",
@ -611,7 +656,7 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
if source_tokens <= input_budget_tokens:
_update_task_progress(task_id, 35, "调用智能整理模型")
return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single")
return _call_mindmap_llm(markdown, mode, prompt_template, task_id, "single", llm_config)
chunks = _chunk_markdown_by_headings(markdown, input_budget_tokens)
logger.info("Mindmap large input split task_id={} chunks={}", task_id, len(chunks))
@ -623,14 +668,14 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
"Mindmap chunk organize task_id={} chunk={}/{} chars={} tokens_est={}",
task_id, index, len(chunks), len(chunk), _estimate_tokens(chunk)
)
partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}")
partial = _call_mindmap_llm(chunk, mode, prompt_template, task_id, f"chunk-{index}", llm_config)
partial_results.append(partial)
merged_input = "\n\n".join(
f"<!-- chunk {index} -->\n{partial}"
for index, partial in enumerate(partial_results, start=1)
)
_, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT)
_, merge_budget_tokens = _get_mindmap_context_budget(MINDMAP_MERGE_PROMPT, llm_config=llm_config)
merge_tokens = _estimate_tokens(merged_input)
if merge_tokens > merge_budget_tokens:
logger.warning(
@ -641,14 +686,20 @@ def _organize_mindmap_markdown(markdown: str, mode: str, custom_prompt: Optional
merged_round: list[str] = []
for index, chunk in enumerate(merge_chunks, start=1):
_update_task_progress(task_id, 78 + int(index / max(len(merge_chunks), 1) * 10), f"合并局部大纲 {index}/{len(merge_chunks)}")
merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}"))
merged_round.append(_call_mindmap_llm(chunk, mode, MINDMAP_MERGE_PROMPT, task_id, f"merge-round-{index}", llm_config))
merged_input = "\n\n".join(merged_round)
_update_task_progress(task_id, 90, "全局整理标题结构")
return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge")
return _call_mindmap_llm(merged_input, mode, MINDMAP_MERGE_PROMPT, task_id, "merge", llm_config)
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
async def _run_mindmap_organize_task(
task_id: str,
markdown: str,
mode: str,
prompt: Optional[str],
llm_config: Optional[MindmapLlmConfig]
):
try:
_store_task_progress(task_id, {
"progress": 10,
@ -658,7 +709,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, pro
"file_names": "",
"result_md": None,
})
organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id)
organized = await asyncio.to_thread(_organize_mindmap_markdown, markdown, mode, prompt, task_id, llm_config)
state = _get_task_progress(task_id) or {}
state.update({
"progress": 100,
@ -722,10 +773,10 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
"result_md": None,
}
_store_task_progress(task_id, state)
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt, request.llm_config))
logger.info(
"Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
"Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={} request_llm_config={}",
os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip()), bool(request.llm_config)
)
return state

View File

@ -41,6 +41,17 @@ export interface MindmapOrganizeProgress extends ParseProgress {
result_md?: string | null
}
export interface MindmapLlmConfig {
base_url?: string
api_key?: string
model?: string
timeout?: number
temperature?: number
max_context_tokens?: number
max_output_tokens?: number
safety_tokens?: number
}
export const documentApi = {
/**
*
@ -102,11 +113,18 @@ export const documentApi = {
})
},
createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
createMindmapTask(
taskId: string,
markdown: string,
mode = 'smart',
prompt?: string,
llmConfig?: MindmapLlmConfig
): Promise<MindmapOrganizeProgress> {
return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
markdown,
mode,
prompt
prompt,
llm_config: llmConfig
}).then(result => {
return result as unknown as MindmapOrganizeProgress
})

View File

@ -130,6 +130,99 @@
</div>
</el-form-item>
<el-form-item label="大模型服务地址" class="form-item">
<el-input
v-model="draftConfig.mindmapLlmBaseUrl"
placeholder="例如http://10.100.53.199:9527/v1"
class="input"
clearable
/>
<div class="form-item-description">
OpenAI 兼容接口地址后端会请求该地址下的 /chat/completions留空时使用服务端 MINDMAP_LLM_BASE_URL
</div>
</el-form-item>
<el-form-item label="大模型 API Key" class="form-item">
<el-input
v-model="draftConfig.mindmapLlmApiKey"
type="password"
placeholder="留空则使用服务端 MINDMAP_LLM_API_KEY"
class="input"
show-password
clearable
/>
<div class="form-item-description">
仅随智能整理请求发送给后端后端日志不会输出明文 Key
</div>
</el-form-item>
<el-form-item label="大模型名称" class="form-item">
<el-input
v-model="draftConfig.mindmapLlmModel"
placeholder="gemma-4-26B"
class="input"
clearable
/>
</el-form-item>
<div class="llm-grid">
<el-form-item label="请求超时(秒)" class="form-item">
<el-input-number
v-model="draftConfig.mindmapLlmTimeout"
:min="30"
:max="3600"
:step="30"
controls-position="right"
class="number-input"
/>
</el-form-item>
<el-form-item label="温度" class="form-item">
<el-input-number
v-model="draftConfig.mindmapLlmTemperature"
:min="0"
:max="2"
:step="0.1"
:precision="1"
controls-position="right"
class="number-input"
/>
</el-form-item>
<el-form-item label="最大上下文 Token" class="form-item">
<el-input-number
v-model="draftConfig.mindmapLlmMaxContextTokens"
:min="4096"
:max="262144"
:step="1024"
controls-position="right"
class="number-input"
/>
</el-form-item>
<el-form-item label="最大输出 Token" class="form-item">
<el-input-number
v-model="draftConfig.mindmapLlmMaxOutputTokens"
:min="512"
:max="32768"
:step="512"
controls-position="right"
class="number-input"
/>
</el-form-item>
<el-form-item label="安全预留 Token" class="form-item">
<el-input-number
v-model="draftConfig.mindmapLlmSafetyTokens"
:min="0"
:max="8192"
:step="256"
controls-position="right"
class="number-input"
/>
</el-form-item>
</div>
<el-form-item label="智能整理提示词" class="form-item">
<el-input
v-model="draftConfig.mindmapPrompt"
@ -323,6 +416,16 @@ const getBackendDescription = (backend: string) => {
line-height: 1.4;
}
.llm-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 0 12px;
}
.number-input {
width: 100%;
}
.config-actions {
display: flex;
justify-content: flex-end;

View File

@ -15,6 +15,14 @@ export interface DocumentConfig {
serverUrl: string
exportResolution: number
mindmapPrompt: string
mindmapLlmBaseUrl: string
mindmapLlmApiKey: string
mindmapLlmModel: string
mindmapLlmTimeout: number
mindmapLlmTemperature: number
mindmapLlmMaxContextTokens: number
mindmapLlmMaxOutputTokens: number
mindmapLlmSafetyTokens: number
tableEnable: boolean
formulaEnable: boolean
language: string
@ -49,6 +57,14 @@ export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
serverUrl: 'http://localhost:30000',
exportResolution: 7680,
mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
mindmapLlmBaseUrl: '',
mindmapLlmApiKey: '',
mindmapLlmModel: 'gemma-4-26B',
mindmapLlmTimeout: 180,
mindmapLlmTemperature: 0.2,
mindmapLlmMaxContextTokens: 32768,
mindmapLlmMaxOutputTokens: 4096,
mindmapLlmSafetyTokens: 1024,
tableEnable: true,
formulaEnable: true,
language: 'ch',

View File

@ -456,7 +456,16 @@ const startSmartOrganize = async () => {
smartOrganizeError.value = ''
try {
await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt, {
base_url: config.mindmapLlmBaseUrl || undefined,
api_key: config.mindmapLlmApiKey || undefined,
model: config.mindmapLlmModel || undefined,
timeout: config.mindmapLlmTimeout,
temperature: config.mindmapLlmTemperature,
max_context_tokens: config.mindmapLlmMaxContextTokens,
max_output_tokens: config.mindmapLlmMaxOutputTokens,
safety_tokens: config.mindmapLlmSafetyTokens
})
startSmartOrganizePolling(taskId)
} catch (err) {
isSmartOrganizing.value = false