feat(思维导图助手):思维导图助手增加智能整理总结1

develop
panyy 2026-06-23 17:48:36 +08:00
parent ac2b198cd7
commit fd69d47eca
5 changed files with 71 additions and 22 deletions

View File

@ -364,6 +364,7 @@ api_router = APIRouter(prefix="/api")
class MindmapOrganizeRequest(BaseModel): class MindmapOrganizeRequest(BaseModel):
markdown: str markdown: str
mode: str = "smart" mode: str = "smart"
prompt: Optional[str] = None
def _extract_json_object(text: str) -> str: def _extract_json_object(text: str) -> str:
@ -374,7 +375,21 @@ def _extract_json_object(text: str) -> str:
return content.strip() return content.strip()
def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str: DEFAULT_MINDMAP_ORGANIZE_PROMPT = """你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
要求
1. 保留原文标题结构不要重写或打乱主要标题层级
2. 将标题下的段落内容总结为要点合并相近段落避免逐段照抄
3. 保留原文语言英文内容输出英文中文内容输出中文多语言内容按原文语言分别保留
4. 不要编造原文没有的信息
5. 保留关键数字公式专有名词步骤和结论
6. 最大层级不超过 4
7. 每个父节点下最多 8 个子节点
8. 节点标题尽量简短正文说明使用短句列表
9. 只输出 Markdown不要输出解释代码块围栏或额外说明"""
def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None) -> str:
base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/") base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B") model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
api_key = os.getenv("MINDMAP_LLM_API_KEY", "") api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
@ -388,27 +403,21 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
if len(compact_markdown) > max_chars: if len(compact_markdown) > max_chars:
compact_markdown = compact_markdown[:max_chars] + "\n\n...(后续内容已截断)" compact_markdown = compact_markdown[:max_chars] + "\n\n...(后续内容已截断)"
style_instruction = "保留原文标题结构,并将段落总结成要点。" if mode == "hybrid" else "重新整理文档结构,提炼主题并合并相近段落。" prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
prompt = f"""你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。 prompt = f"""{prompt_template}
要求
1. {style_instruction}
2. 不要逐段照抄原文要归纳合并总结
3. 不要编造原文没有的信息
4. 保留关键数字公式专有名词步骤和结论
5. 最大层级不超过 4
6. 每个父节点下最多 8 个子节点
7. 节点标题尽量简短正文说明使用短句列表
8. 只输出 Markdown不要输出解释代码块围栏或额外说明
原始 Markdown 原始 Markdown
{compact_markdown} {compact_markdown}
""" """
logger.info(
"Mindmap LLM request start task_id={} model={} base_url={} mode={} input_chars={} prompt_chars={}",
task_id or "-", model, base_url, mode, len(compact_markdown), len(prompt_template)
)
payload = { payload = {
"model": model, "model": model,
"messages": [ "messages": [
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的中文思维导图 Markdown。"}, {"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown,并严格保留原文语言"},
{"role": "user", "content": prompt}, {"role": "user", "content": prompt},
], ],
"temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")), "temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
@ -434,10 +443,14 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
organized = _extract_json_object(content) organized = _extract_json_object(content)
if not organized: if not organized:
raise RuntimeError("智能整理模型未返回有效内容") raise RuntimeError("智能整理模型未返回有效内容")
logger.info(
"Mindmap LLM request completed task_id={} output_chars={}",
task_id or "-", len(organized)
)
return organized return organized
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str): async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
try: try:
_store_task_progress(task_id, { _store_task_progress(task_id, {
"progress": 10, "progress": 10,
@ -448,7 +461,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
"result_md": None, "result_md": None,
}) })
_update_task_progress(task_id, 35, "调用智能整理模型") _update_task_progress(task_id, 35, "调用智能整理模型")
organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode) organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode, prompt, task_id)
state = _get_task_progress(task_id) or {} state = _get_task_progress(task_id) or {}
state.update({ state.update({
"progress": 100, "progress": 100,
@ -459,7 +472,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
}) })
_store_task_progress(task_id, state) _store_task_progress(task_id, state)
except Exception as exc: except Exception as exc:
logger.exception(exc) logger.exception(f"Mindmap organize task failed task_id={task_id}: {exc}")
state = _get_task_progress(task_id) or {} state = _get_task_progress(task_id) or {}
state.update({ state.update({
"progress": 100, "progress": 100,
@ -512,8 +525,11 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
"result_md": None, "result_md": None,
} }
_store_task_progress(task_id, state) _store_task_progress(task_id, state)
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode)) asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
logger.info(f"Registered mindmap organize task pid={os.getpid()} task_id={task_id}") logger.info(
"Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
)
return state return state

View File

@ -102,10 +102,11 @@ export const documentApi = {
}) })
}, },
createMindmapTask(taskId: string, markdown: string, mode = 'smart'): Promise<MindmapOrganizeProgress> { createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, { return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
markdown, markdown,
mode mode,
prompt
}).then(result => { }).then(result => {
return result as unknown as MindmapOrganizeProgress return result as unknown as MindmapOrganizeProgress
}) })

View File

@ -75,6 +75,23 @@
<div class="divider"></div> <div class="divider"></div>
<div class="section-title">思维导图智能整理</div>
<el-form-item label="智能整理提示词" class="form-item">
<el-input
v-model="draftConfig.mindmapPrompt"
type="textarea"
:rows="10"
resize="vertical"
class="input"
/>
<div class="form-item-description">
切换到智能整理时发送给后端大模型默认保留标题结构总结段落要点并保持原文语言
</div>
</el-form-item>
<div class="divider"></div>
<!-- 识别选项 --> <!-- 识别选项 -->
<div class="section-title">{{ $t('config.recognitionOptions') }}</div> <div class="section-title">{{ $t('config.recognitionOptions') }}</div>

View File

@ -14,6 +14,7 @@ export interface DocumentConfig {
backend: string backend: string
serverUrl: string serverUrl: string
exportResolution: number exportResolution: number
mindmapPrompt: string
tableEnable: boolean tableEnable: boolean
formulaEnable: boolean formulaEnable: boolean
language: string language: string
@ -29,11 +30,25 @@ export interface ProcessResult {
export const DOCUMENT_CONFIG_STORAGE_KEY = 'mineru.documentProcessor.config.v4' export const DOCUMENT_CONFIG_STORAGE_KEY = 'mineru.documentProcessor.config.v4'
export const DEFAULT_MINDMAP_PROMPT = `你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
1.
2.
3.
4.
5.
6. 4
7. 8
8. 使
9. Markdown`
export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = { export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
maxPages: 1000, maxPages: 1000,
backend: 'hybrid-auto-engine', backend: 'hybrid-auto-engine',
serverUrl: 'http://localhost:30000', serverUrl: 'http://localhost:30000',
exportResolution: 7680, exportResolution: 7680,
mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
tableEnable: true, tableEnable: true,
formulaEnable: true, formulaEnable: true,
language: 'ch', language: 'ch',

View File

@ -455,7 +455,7 @@ const startSmartOrganize = async () => {
smartOrganizeError.value = '' smartOrganizeError.value = ''
try { try {
await documentApi.createMindmapTask(taskId, markdown, 'smart') await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
startSmartOrganizePolling(taskId) startSmartOrganizePolling(taskId)
} catch (err) { } catch (err) {
isSmartOrganizing.value = false isSmartOrganizing.value = false