feat(思维导图助手):思维导图助手增加智能整理总结1
parent
ac2b198cd7
commit
fd69d47eca
|
|
@ -364,6 +364,7 @@ api_router = APIRouter(prefix="/api")
|
|||
class MindmapOrganizeRequest(BaseModel):
|
||||
markdown: str
|
||||
mode: str = "smart"
|
||||
prompt: Optional[str] = None
|
||||
|
||||
|
||||
def _extract_json_object(text: str) -> str:
|
||||
|
|
@ -374,7 +375,21 @@ def _extract_json_object(text: str) -> str:
|
|||
return content.strip()
|
||||
|
||||
|
||||
def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
||||
DEFAULT_MINDMAP_ORGANIZE_PROMPT = """你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
||||
|
||||
要求:
|
||||
1. 保留原文标题结构,不要重写或打乱主要标题层级。
|
||||
2. 将标题下的段落内容总结为要点,合并相近段落,避免逐段照抄。
|
||||
3. 保留原文语言:英文内容输出英文,中文内容输出中文,多语言内容按原文语言分别保留。
|
||||
4. 不要编造原文没有的信息。
|
||||
5. 保留关键数字、公式、专有名词、步骤和结论。
|
||||
6. 最大层级不超过 4 层。
|
||||
7. 每个父节点下最多 8 个子节点。
|
||||
8. 节点标题尽量简短,正文说明使用短句列表。
|
||||
9. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。"""
|
||||
|
||||
|
||||
def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None) -> str:
|
||||
base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
|
||||
model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
|
||||
api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
|
||||
|
|
@ -388,27 +403,21 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
|||
if len(compact_markdown) > max_chars:
|
||||
compact_markdown = compact_markdown[:max_chars] + "\n\n...(后续内容已截断)"
|
||||
|
||||
style_instruction = "保留原文标题结构,并将段落总结成要点。" if mode == "hybrid" else "重新整理文档结构,提炼主题并合并相近段落。"
|
||||
prompt = f"""你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
||||
|
||||
要求:
|
||||
1. {style_instruction}
|
||||
2. 不要逐段照抄原文,要归纳、合并、总结。
|
||||
3. 不要编造原文没有的信息。
|
||||
4. 保留关键数字、公式、专有名词、步骤和结论。
|
||||
5. 最大层级不超过 4 层。
|
||||
6. 每个父节点下最多 8 个子节点。
|
||||
7. 节点标题尽量简短,正文说明使用短句列表。
|
||||
8. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。
|
||||
prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
|
||||
prompt = f"""{prompt_template}
|
||||
|
||||
原始 Markdown:
|
||||
{compact_markdown}
|
||||
"""
|
||||
logger.info(
|
||||
"Mindmap LLM request start task_id={} model={} base_url={} mode={} input_chars={} prompt_chars={}",
|
||||
task_id or "-", model, base_url, mode, len(compact_markdown), len(prompt_template)
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的中文思维导图 Markdown。"},
|
||||
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown,并严格保留原文语言。"},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
|
||||
|
|
@ -434,10 +443,14 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
|||
organized = _extract_json_object(content)
|
||||
if not organized:
|
||||
raise RuntimeError("智能整理模型未返回有效内容")
|
||||
logger.info(
|
||||
"Mindmap LLM request completed task_id={} output_chars={}",
|
||||
task_id or "-", len(organized)
|
||||
)
|
||||
return organized
|
||||
|
||||
|
||||
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
||||
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
|
||||
try:
|
||||
_store_task_progress(task_id, {
|
||||
"progress": 10,
|
||||
|
|
@ -448,7 +461,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
|||
"result_md": None,
|
||||
})
|
||||
_update_task_progress(task_id, 35, "调用智能整理模型")
|
||||
organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode)
|
||||
organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode, prompt, task_id)
|
||||
state = _get_task_progress(task_id) or {}
|
||||
state.update({
|
||||
"progress": 100,
|
||||
|
|
@ -459,7 +472,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
|||
})
|
||||
_store_task_progress(task_id, state)
|
||||
except Exception as exc:
|
||||
logger.exception(exc)
|
||||
logger.exception(f"Mindmap organize task failed task_id={task_id}: {exc}")
|
||||
state = _get_task_progress(task_id) or {}
|
||||
state.update({
|
||||
"progress": 100,
|
||||
|
|
@ -512,8 +525,11 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
|
|||
"result_md": None,
|
||||
}
|
||||
_store_task_progress(task_id, state)
|
||||
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode))
|
||||
logger.info(f"Registered mindmap organize task pid={os.getpid()} task_id={task_id}")
|
||||
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
|
||||
logger.info(
|
||||
"Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
|
||||
os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
|
||||
)
|
||||
return state
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -102,10 +102,11 @@ export const documentApi = {
|
|||
})
|
||||
},
|
||||
|
||||
createMindmapTask(taskId: string, markdown: string, mode = 'smart'): Promise<MindmapOrganizeProgress> {
|
||||
createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
|
||||
return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
|
||||
markdown,
|
||||
mode
|
||||
mode,
|
||||
prompt
|
||||
}).then(result => {
|
||||
return result as unknown as MindmapOrganizeProgress
|
||||
})
|
||||
|
|
|
|||
|
|
@ -75,6 +75,23 @@
|
|||
|
||||
<div class="divider"></div>
|
||||
|
||||
<div class="section-title">思维导图智能整理</div>
|
||||
|
||||
<el-form-item label="智能整理提示词" class="form-item">
|
||||
<el-input
|
||||
v-model="draftConfig.mindmapPrompt"
|
||||
type="textarea"
|
||||
:rows="10"
|
||||
resize="vertical"
|
||||
class="input"
|
||||
/>
|
||||
<div class="form-item-description">
|
||||
切换到“智能整理”时发送给后端大模型;默认保留标题结构、总结段落要点,并保持原文语言。
|
||||
</div>
|
||||
</el-form-item>
|
||||
|
||||
<div class="divider"></div>
|
||||
|
||||
<!-- 识别选项 -->
|
||||
<div class="section-title">{{ $t('config.recognitionOptions') }}</div>
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ export interface DocumentConfig {
|
|||
backend: string
|
||||
serverUrl: string
|
||||
exportResolution: number
|
||||
mindmapPrompt: string
|
||||
tableEnable: boolean
|
||||
formulaEnable: boolean
|
||||
language: string
|
||||
|
|
@ -29,11 +30,25 @@ export interface ProcessResult {
|
|||
|
||||
export const DOCUMENT_CONFIG_STORAGE_KEY = 'mineru.documentProcessor.config.v4'
|
||||
|
||||
export const DEFAULT_MINDMAP_PROMPT = `你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
||||
|
||||
要求:
|
||||
1. 保留原文标题结构,不要重写或打乱主要标题层级。
|
||||
2. 将标题下的段落内容总结为要点,合并相近段落,避免逐段照抄。
|
||||
3. 保留原文语言:英文内容输出英文,中文内容输出中文,多语言内容按原文语言分别保留。
|
||||
4. 不要编造原文没有的信息。
|
||||
5. 保留关键数字、公式、专有名词、步骤和结论。
|
||||
6. 最大层级不超过 4 层。
|
||||
7. 每个父节点下最多 8 个子节点。
|
||||
8. 节点标题尽量简短,正文说明使用短句列表。
|
||||
9. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。`
|
||||
|
||||
export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
|
||||
maxPages: 1000,
|
||||
backend: 'hybrid-auto-engine',
|
||||
serverUrl: 'http://localhost:30000',
|
||||
exportResolution: 7680,
|
||||
mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
|
||||
tableEnable: true,
|
||||
formulaEnable: true,
|
||||
language: 'ch',
|
||||
|
|
|
|||
|
|
@ -455,7 +455,7 @@ const startSmartOrganize = async () => {
|
|||
smartOrganizeError.value = ''
|
||||
|
||||
try {
|
||||
await documentApi.createMindmapTask(taskId, markdown, 'smart')
|
||||
await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
|
||||
startSmartOrganizePolling(taskId)
|
||||
} catch (err) {
|
||||
isSmartOrganizing.value = false
|
||||
|
|
|
|||
Loading…
Reference in New Issue