feat(思维导图助手):思维导图助手增加智能整理总结1
parent
ac2b198cd7
commit
fd69d47eca
|
|
@ -364,6 +364,7 @@ api_router = APIRouter(prefix="/api")
|
||||||
class MindmapOrganizeRequest(BaseModel):
|
class MindmapOrganizeRequest(BaseModel):
|
||||||
markdown: str
|
markdown: str
|
||||||
mode: str = "smart"
|
mode: str = "smart"
|
||||||
|
prompt: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
def _extract_json_object(text: str) -> str:
|
def _extract_json_object(text: str) -> str:
|
||||||
|
|
@ -374,7 +375,21 @@ def _extract_json_object(text: str) -> str:
|
||||||
return content.strip()
|
return content.strip()
|
||||||
|
|
||||||
|
|
||||||
def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
DEFAULT_MINDMAP_ORGANIZE_PROMPT = """你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
||||||
|
|
||||||
|
要求:
|
||||||
|
1. 保留原文标题结构,不要重写或打乱主要标题层级。
|
||||||
|
2. 将标题下的段落内容总结为要点,合并相近段落,避免逐段照抄。
|
||||||
|
3. 保留原文语言:英文内容输出英文,中文内容输出中文,多语言内容按原文语言分别保留。
|
||||||
|
4. 不要编造原文没有的信息。
|
||||||
|
5. 保留关键数字、公式、专有名词、步骤和结论。
|
||||||
|
6. 最大层级不超过 4 层。
|
||||||
|
7. 每个父节点下最多 8 个子节点。
|
||||||
|
8. 节点标题尽量简短,正文说明使用短句列表。
|
||||||
|
9. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。"""
|
||||||
|
|
||||||
|
|
||||||
|
def _call_mindmap_llm(markdown: str, mode: str = "smart", custom_prompt: Optional[str] = None, task_id: Optional[str] = None) -> str:
|
||||||
base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
|
base_url = os.getenv("MINDMAP_LLM_BASE_URL", "").rstrip("/")
|
||||||
model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
|
model = os.getenv("MINDMAP_LLM_MODEL", "gemma-4-26B")
|
||||||
api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
|
api_key = os.getenv("MINDMAP_LLM_API_KEY", "")
|
||||||
|
|
@ -388,27 +403,21 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
||||||
if len(compact_markdown) > max_chars:
|
if len(compact_markdown) > max_chars:
|
||||||
compact_markdown = compact_markdown[:max_chars] + "\n\n...(后续内容已截断)"
|
compact_markdown = compact_markdown[:max_chars] + "\n\n...(后续内容已截断)"
|
||||||
|
|
||||||
style_instruction = "保留原文标题结构,并将段落总结成要点。" if mode == "hybrid" else "重新整理文档结构,提炼主题并合并相近段落。"
|
prompt_template = (custom_prompt or "").strip() or DEFAULT_MINDMAP_ORGANIZE_PROMPT
|
||||||
prompt = f"""你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
prompt = f"""{prompt_template}
|
||||||
|
|
||||||
要求:
|
|
||||||
1. {style_instruction}
|
|
||||||
2. 不要逐段照抄原文,要归纳、合并、总结。
|
|
||||||
3. 不要编造原文没有的信息。
|
|
||||||
4. 保留关键数字、公式、专有名词、步骤和结论。
|
|
||||||
5. 最大层级不超过 4 层。
|
|
||||||
6. 每个父节点下最多 8 个子节点。
|
|
||||||
7. 节点标题尽量简短,正文说明使用短句列表。
|
|
||||||
8. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。
|
|
||||||
|
|
||||||
原始 Markdown:
|
原始 Markdown:
|
||||||
{compact_markdown}
|
{compact_markdown}
|
||||||
"""
|
"""
|
||||||
|
logger.info(
|
||||||
|
"Mindmap LLM request start task_id={} model={} base_url={} mode={} input_chars={} prompt_chars={}",
|
||||||
|
task_id or "-", model, base_url, mode, len(compact_markdown), len(prompt_template)
|
||||||
|
)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的中文思维导图 Markdown。"},
|
{"role": "system", "content": "你擅长把长文档整理成结构清晰、层次合理的思维导图 Markdown,并严格保留原文语言。"},
|
||||||
{"role": "user", "content": prompt},
|
{"role": "user", "content": prompt},
|
||||||
],
|
],
|
||||||
"temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
|
"temperature": float(os.getenv("MINDMAP_LLM_TEMPERATURE", "0.2")),
|
||||||
|
|
@ -434,10 +443,14 @@ def _call_mindmap_llm(markdown: str, mode: str = "smart") -> str:
|
||||||
organized = _extract_json_object(content)
|
organized = _extract_json_object(content)
|
||||||
if not organized:
|
if not organized:
|
||||||
raise RuntimeError("智能整理模型未返回有效内容")
|
raise RuntimeError("智能整理模型未返回有效内容")
|
||||||
|
logger.info(
|
||||||
|
"Mindmap LLM request completed task_id={} output_chars={}",
|
||||||
|
task_id or "-", len(organized)
|
||||||
|
)
|
||||||
return organized
|
return organized
|
||||||
|
|
||||||
|
|
||||||
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str, prompt: Optional[str]):
|
||||||
try:
|
try:
|
||||||
_store_task_progress(task_id, {
|
_store_task_progress(task_id, {
|
||||||
"progress": 10,
|
"progress": 10,
|
||||||
|
|
@ -448,7 +461,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
||||||
"result_md": None,
|
"result_md": None,
|
||||||
})
|
})
|
||||||
_update_task_progress(task_id, 35, "调用智能整理模型")
|
_update_task_progress(task_id, 35, "调用智能整理模型")
|
||||||
organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode)
|
organized = await asyncio.to_thread(_call_mindmap_llm, markdown, mode, prompt, task_id)
|
||||||
state = _get_task_progress(task_id) or {}
|
state = _get_task_progress(task_id) or {}
|
||||||
state.update({
|
state.update({
|
||||||
"progress": 100,
|
"progress": 100,
|
||||||
|
|
@ -459,7 +472,7 @@ async def _run_mindmap_organize_task(task_id: str, markdown: str, mode: str):
|
||||||
})
|
})
|
||||||
_store_task_progress(task_id, state)
|
_store_task_progress(task_id, state)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.exception(exc)
|
logger.exception(f"Mindmap organize task failed task_id={task_id}: {exc}")
|
||||||
state = _get_task_progress(task_id) or {}
|
state = _get_task_progress(task_id) or {}
|
||||||
state.update({
|
state.update({
|
||||||
"progress": 100,
|
"progress": 100,
|
||||||
|
|
@ -512,8 +525,11 @@ async def create_mindmap_task(task_id: str, request: MindmapOrganizeRequest):
|
||||||
"result_md": None,
|
"result_md": None,
|
||||||
}
|
}
|
||||||
_store_task_progress(task_id, state)
|
_store_task_progress(task_id, state)
|
||||||
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode))
|
asyncio.create_task(_run_mindmap_organize_task(task_id, markdown, request.mode, request.prompt))
|
||||||
logger.info(f"Registered mindmap organize task pid={os.getpid()} task_id={task_id}")
|
logger.info(
|
||||||
|
"Registered mindmap organize task pid={} task_id={} mode={} input_chars={} custom_prompt={}",
|
||||||
|
os.getpid(), task_id, request.mode, len(markdown), bool((request.prompt or "").strip())
|
||||||
|
)
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -102,10 +102,11 @@ export const documentApi = {
|
||||||
})
|
})
|
||||||
},
|
},
|
||||||
|
|
||||||
createMindmapTask(taskId: string, markdown: string, mode = 'smart'): Promise<MindmapOrganizeProgress> {
|
createMindmapTask(taskId: string, markdown: string, mode = 'smart', prompt?: string): Promise<MindmapOrganizeProgress> {
|
||||||
return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
|
return request.post(`/api/mindmap_tasks/${encodeURIComponent(taskId)}`, {
|
||||||
markdown,
|
markdown,
|
||||||
mode
|
mode,
|
||||||
|
prompt
|
||||||
}).then(result => {
|
}).then(result => {
|
||||||
return result as unknown as MindmapOrganizeProgress
|
return result as unknown as MindmapOrganizeProgress
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,23 @@
|
||||||
|
|
||||||
<div class="divider"></div>
|
<div class="divider"></div>
|
||||||
|
|
||||||
|
<div class="section-title">思维导图智能整理</div>
|
||||||
|
|
||||||
|
<el-form-item label="智能整理提示词" class="form-item">
|
||||||
|
<el-input
|
||||||
|
v-model="draftConfig.mindmapPrompt"
|
||||||
|
type="textarea"
|
||||||
|
:rows="10"
|
||||||
|
resize="vertical"
|
||||||
|
class="input"
|
||||||
|
/>
|
||||||
|
<div class="form-item-description">
|
||||||
|
切换到“智能整理”时发送给后端大模型;默认保留标题结构、总结段落要点,并保持原文语言。
|
||||||
|
</div>
|
||||||
|
</el-form-item>
|
||||||
|
|
||||||
|
<div class="divider"></div>
|
||||||
|
|
||||||
<!-- 识别选项 -->
|
<!-- 识别选项 -->
|
||||||
<div class="section-title">{{ $t('config.recognitionOptions') }}</div>
|
<div class="section-title">{{ $t('config.recognitionOptions') }}</div>
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ export interface DocumentConfig {
|
||||||
backend: string
|
backend: string
|
||||||
serverUrl: string
|
serverUrl: string
|
||||||
exportResolution: number
|
exportResolution: number
|
||||||
|
mindmapPrompt: string
|
||||||
tableEnable: boolean
|
tableEnable: boolean
|
||||||
formulaEnable: boolean
|
formulaEnable: boolean
|
||||||
language: string
|
language: string
|
||||||
|
|
@ -29,11 +30,25 @@ export interface ProcessResult {
|
||||||
|
|
||||||
export const DOCUMENT_CONFIG_STORAGE_KEY = 'mineru.documentProcessor.config.v4'
|
export const DOCUMENT_CONFIG_STORAGE_KEY = 'mineru.documentProcessor.config.v4'
|
||||||
|
|
||||||
|
export const DEFAULT_MINDMAP_PROMPT = `你是文档结构整理助手。请基于用户提供的 Markdown 生成适合思维导图展示的 Markdown。
|
||||||
|
|
||||||
|
要求:
|
||||||
|
1. 保留原文标题结构,不要重写或打乱主要标题层级。
|
||||||
|
2. 将标题下的段落内容总结为要点,合并相近段落,避免逐段照抄。
|
||||||
|
3. 保留原文语言:英文内容输出英文,中文内容输出中文,多语言内容按原文语言分别保留。
|
||||||
|
4. 不要编造原文没有的信息。
|
||||||
|
5. 保留关键数字、公式、专有名词、步骤和结论。
|
||||||
|
6. 最大层级不超过 4 层。
|
||||||
|
7. 每个父节点下最多 8 个子节点。
|
||||||
|
8. 节点标题尽量简短,正文说明使用短句列表。
|
||||||
|
9. 只输出 Markdown,不要输出解释、代码块围栏或额外说明。`
|
||||||
|
|
||||||
export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
|
export const DEFAULT_DOCUMENT_CONFIG: DocumentConfig = {
|
||||||
maxPages: 1000,
|
maxPages: 1000,
|
||||||
backend: 'hybrid-auto-engine',
|
backend: 'hybrid-auto-engine',
|
||||||
serverUrl: 'http://localhost:30000',
|
serverUrl: 'http://localhost:30000',
|
||||||
exportResolution: 7680,
|
exportResolution: 7680,
|
||||||
|
mindmapPrompt: DEFAULT_MINDMAP_PROMPT,
|
||||||
tableEnable: true,
|
tableEnable: true,
|
||||||
formulaEnable: true,
|
formulaEnable: true,
|
||||||
language: 'ch',
|
language: 'ch',
|
||||||
|
|
|
||||||
|
|
@ -455,7 +455,7 @@ const startSmartOrganize = async () => {
|
||||||
smartOrganizeError.value = ''
|
smartOrganizeError.value = ''
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await documentApi.createMindmapTask(taskId, markdown, 'smart')
|
await documentApi.createMindmapTask(taskId, markdown, 'hybrid', config.mindmapPrompt)
|
||||||
startSmartOrganizePolling(taskId)
|
startSmartOrganizePolling(taskId)
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
isSmartOrganizing.value = false
|
isSmartOrganizing.value = false
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue