Compare commits

...

3 Commits

Author SHA1 Message Date
Bifang b5649cc218 重构Neo4j保存数据结构 2026-06-15 13:13:20 +08:00
Bifang f857a90977 web更新 2026-06-15 09:17:21 +08:00
Bifang b7d4cc8782 测试版本1 2026-06-12 10:57:32 +08:00
17 changed files with 3071 additions and 1689 deletions

View File

@ -2,12 +2,18 @@ import json
import logging
import re
import sys
from typing import List, Optional
from enum import Enum
from typing import Any, List, Optional
from openai import OpenAI
from pydantic import BaseModel, Field
from meeting_memory.config import config
from meeting_memory.prompts import extract_entities as prompt_extract_entities
from meeting_memory.prompts import extract_facts as prompt_extract_facts
from meeting_memory.prompts import resolve_entities as prompt_dedupe_nodes
from meeting_memory.prompts import resolve_facts as prompt_dedupe_edges
from meeting_memory.prompts import summarize_entity as prompt_summarize
logger = logging.getLogger(__name__)
@ -17,52 +23,96 @@ client = OpenAI(
)
class EntityType(str, Enum):
DEPARTMENT = 'Department'
PROJECT = 'Project'
METRIC = 'Metric'
PERSON = 'Person'
SYSTEM = 'System'
DOCUMENT = 'Document'
PARTICIPANT = 'participant'
UNKNOWN = 'Unknown'
# Normalization map: legacy LLM output → canonical type
_ENTITY_TYPE_ALIASES = {
'组织': 'Department',
'organization': 'Department',
'部门': 'Department',
'指标': 'Metric',
'kpi': 'Metric',
'项目': 'Project',
}
def _canonical_entity_type(raw: str) -> str:
normalized = raw.strip()
if normalized in _ENTITY_TYPE_ALIASES:
return _ENTITY_TYPE_ALIASES[normalized]
for member in EntityType:
if member.value.lower() == normalized.lower():
return member.value
return EntityType.UNKNOWN.value
def _neo4j_labels(entity_type: str) -> list[str]:
canonical = _canonical_entity_type(entity_type)
labels = ['Entity']
if canonical != EntityType.UNKNOWN.value:
labels.append(canonical)
return labels
class Entity(BaseModel):
name: str
entity_type: str
description: str = ""
entity_type: str = EntityType.UNKNOWN.value
description: str = ''
class Relation(BaseModel):
subject: str
subject_type: str
predicate: str
object: str
object_type: str
description: str = ""
fact: str = ""
source_entity_name: str
target_entity_name: str
relation_type: str
fact: str = ''
valid_at: str = ''
invalid_at: str = ''
evidence: str = ''
qualifiers: List[str] = Field(default_factory=list)
evidence: str = ""
confidence: float = 0.0
valid_at: str = ""
invalid_at: str = ""
class ActionItem(BaseModel):
task: str
assignee: str = ""
deadline: str = ""
status: str = "待办"
priority: str = ""
assignee: str = ''
deadline: str = ''
status: str = '待办'
priority: str = ''
class Decision(BaseModel):
content: str
proposer: str = ""
status: str = "已决"
proposer: str = ''
status: str = '已决'
class MeetingMetric(BaseModel):
metric_name: str
value: str
target: str = ""
owner: str = ""
trend: str = ""
target: str = ''
owner: str = ''
trend: str = ''
unit: str = ''
class DepartmentInfo(BaseModel):
name: str
description: str = ''
projects: List[str] = Field(default_factory=list)
class MeetingExtraction(BaseModel):
title: str
date: str = ""
date: str = ''
participants: List[str] = Field(default_factory=list)
agenda: List[str] = Field(default_factory=list)
entities: List[Entity] = Field(default_factory=list)
@ -70,15 +120,282 @@ class MeetingExtraction(BaseModel):
action_items: List[ActionItem] = Field(default_factory=list)
decisions: List[Decision] = Field(default_factory=list)
metrics: List[MeetingMetric] = Field(default_factory=list)
summary: str = ""
departments: List[DepartmentInfo] = Field(default_factory=list)
summary: str = ''
def _call_llm(
messages: list[dict],
response_model: type | None = None,
stream: bool = False,
max_tokens: int | None = None,
) -> Any:
kwargs = {
'model': config.llm.model,
'messages': messages,
'max_tokens': max_tokens or config.llm.max_tokens,
'temperature': config.llm.temperature,
}
if response_model is not None:
kwargs['response_format'] = {'type': 'json_object'}
if stream:
kwargs['stream'] = True
if not stream:
response = client.chat.completions.create(**kwargs)
content = response.choices[0].message.content
if content is None:
raise ValueError('LLM returned empty response')
return content
kwargs['stream'] = True
response = client.chat.completions.create(**kwargs)
chunks: List[str] = []
print('\n[LLM] 开始流式输出:')
for event in response:
if not event.choices:
continue
delta = event.choices[0].delta.content
if not delta:
continue
chunks.append(delta)
sys.stdout.write(delta)
sys.stdout.flush()
print('\n[LLM] 输出结束')
return ''.join(chunks)
def _try_parse_json(content: str) -> dict | list:
try:
return json.loads(content)
except json.JSONDecodeError:
logger.warning('JSON parsing failed; trying to repair extracted block')
match = re.search(r'\{.*\}|\[.*\]', content, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError as exc:
logger.error('Repaired JSON still failed to parse: %s', exc)
raise
def _normalize_string(name: str) -> str:
return re.sub(r'[\s]+', ' ', name.strip().lower())
def _format_episodes_for_context(episodes: list[dict] | None) -> str:
if not episodes:
return ''
return '\n'.join(
f'[Episode {i}] {ep.get("content", "")}'
for i, ep in enumerate(episodes)
)
# ===== Step 1: 实体节点抽取 =====
def extract_entities_from_text(
text: str,
previous_episodes: list[dict] | None = None,
entity_types: list[dict] | None = None,
stream: bool = False,
) -> list[dict]:
context = {
'episode_content': text,
'previous_episodes': previous_episodes or [],
'entity_types': entity_types or [],
}
messages = prompt_extract_entities(context)
content = _call_llm(messages, stream=stream)
try:
data = _try_parse_json(content)
except Exception as exc:
logger.error('Failed to parse entity extraction result: %s', exc)
return []
if isinstance(data, dict):
data = data.get('entities', data.get('extracted_entities', []))
if not isinstance(data, list):
return []
result = []
for item in data:
if isinstance(item, dict) and item.get('name', '').strip():
result.append({
'name': item['name'].strip(),
'entity_type': item.get('entity_type', 'Entity'),
'description': item.get('description', ''),
'evidence': item.get('evidence', ''),
})
return result
# ===== Step 2: 实体去重 =====
def resolve_entities_against_graph(
extracted: list[dict],
existing: list[dict],
episode_content: str = '',
) -> list[dict]:
if not existing:
return extracted
context = {
'extracted_entities': extracted,
'existing_entities': existing,
'episode_content': episode_content,
}
messages = prompt_dedupe_nodes(context)
content = _call_llm(messages)
try:
data = _try_parse_json(content)
except Exception as exc:
logger.warning('LLM dedup failed, keeping all extracted: %s', exc)
return extracted
if isinstance(data, dict):
data = data.get('entity_resolutions', data.get('resolutions', []))
extracted_by_id = {i: e for i, e in enumerate(extracted)}
existing_by_id = {c.get('candidate_id'): c for c in existing}
for resolution in (data if isinstance(data, list) else []):
if not isinstance(resolution, dict):
continue
rid = resolution.get('id')
dup_id = resolution.get('duplicate_candidate_id', -1)
if rid is None or rid not in extracted_by_id:
continue
if dup_id >= 0 and dup_id in existing_by_id:
extracted_by_id[rid]['_resolved_to'] = existing_by_id[dup_id]
extracted_by_id[rid]['name'] = resolution.get('name', extracted_by_id[rid]['name'])
return [e for e in extracted_by_id.values() if '_resolved_to' not in e]
# ===== Step 3: 事实关系抽取 =====
def extract_facts_from_text(
text: str,
entities: list[dict],
reference_time: str = '',
previous_episodes: list[dict] | None = None,
stream: bool = False,
) -> list[dict]:
if len(entities) < 2:
return []
context = {
'episode_content': text,
'entities': entities,
'reference_time': reference_time,
'previous_episodes': previous_episodes or [],
}
messages = prompt_extract_facts(context)
content = _call_llm(messages, stream=stream)
try:
data = _try_parse_json(content)
except Exception as exc:
logger.error('Failed to parse fact extraction result: %s', exc)
return []
if isinstance(data, dict):
data = data.get('edges', data.get('facts', data.get('relations', [])))
if not isinstance(data, list):
return []
entity_names = {_normalize_string(e.get('name', '')) for e in entities}
result = []
for item in data:
if not isinstance(item, dict):
continue
src = _normalize_string(item.get('source_entity_name', ''))
tgt = _normalize_string(item.get('target_entity_name', ''))
if src not in entity_names or tgt not in entity_names:
continue
if src == tgt:
continue
result.append({
'source_entity_name': item['source_entity_name'],
'target_entity_name': item['target_entity_name'],
'relation_type': item.get('relation_type', '关联'),
'fact': item.get('fact', ''),
'valid_at': item.get('valid_at', ''),
'invalid_at': item.get('invalid_at', ''),
'evidence': item.get('evidence', ''),
'qualifiers': item.get('qualifiers', []),
'confidence': item.get('confidence', 0.0),
})
return result
# ===== Step 4: 事实去重/矛盾检测 =====
def resolve_facts_against_graph(
new_fact: dict,
existing_facts: list[dict],
invalidation_candidates: list[dict],
) -> dict:
if not existing_facts:
return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
context = {
'new_fact': new_fact.get('fact', ''),
'existing_facts': existing_facts,
'invalidation_candidates': invalidation_candidates,
}
messages = prompt_dedupe_edges(context)
content = _call_llm(messages)
try:
data = _try_parse_json(content)
except Exception as exc:
logger.warning('Fact dedup failed, treating as new: %s', exc)
return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
if not isinstance(data, dict):
return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
return {
'is_duplicate': len(data.get('duplicate_facts', [])) > 0,
'is_contradicted': len(data.get('contradicted_facts', [])) > 0,
'resolved': new_fact,
'duplicate_facts': data.get('duplicate_facts', []),
'contradicted_facts': data.get('contradicted_facts', []),
}
# ===== Step 5: 实体摘要 =====
def extract_entity_summary(
entity_name: str,
episodes: list[str],
existing_summary: str = '',
previous_episodes: list[dict] | None = None,
) -> str:
context = {
'entity_name': entity_name,
'episodes': episodes,
'existing_summary': existing_summary,
'previous_episodes': previous_episodes or [],
}
messages = prompt_summarize(context)
content = _call_llm(messages, max_tokens=1024)
try:
data = _try_parse_json(content)
except Exception:
logger.warning('Failed to parse summary, using empty')
return ''
if isinstance(data, dict):
return data.get('summary', '')
return ''
# ===== 统一入口(兼容原有接口) =====
EXTRACTION_SYSTEM_PROMPT = """
你是一个专业的会议知识抽取助手你的任务是从中文会议记录中抽取结构化事实尤其要抽出更细粒度更有语义深度的关系
输出要求
1. 只输出一个 JSON 对象不要输出解释文字
2. 关系抽取不要停留在部门汇报了工作这种浅层描述要尽可能向下细化到
2. 关系抽取不要停留在"部门汇报了工作"这种浅层描述要尽可能向下细化到
- 责任归属
- 目标值 / 当前值 / 趋势
- 约束条件
@ -99,51 +416,9 @@ EXTRACTION_SYSTEM_PROMPT = """
"""
def _call_llm(system: str, user: str, stream: bool = False) -> str:
if not stream:
response = client.chat.completions.create(
model=config.llm.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=config.llm.max_tokens,
temperature=config.llm.temperature,
)
content = response.choices[0].message.content
if content is None:
raise ValueError("LLM returned empty response")
return content
response = client.chat.completions.create(
model=config.llm.model,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=config.llm.max_tokens,
temperature=config.llm.temperature,
stream=True,
)
chunks: List[str] = []
print("\n[LLM] 开始抽取,流式输出中:")
for event in response:
if not event.choices:
continue
delta = event.choices[0].delta.content
if not delta:
continue
chunks.append(delta)
sys.stdout.write(delta)
sys.stdout.flush()
print("\n[LLM] 抽取输出结束")
return "".join(chunks)
def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
user_prompt = f"""
请从下面会议记录中提取结构化信息并重点做深层关系抽取
请从下面会议记录中提取结构化信息并重点做"深层关系抽取""层次结构识别"
输出 JSON 字段
- title
@ -151,28 +426,32 @@ def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
- participants
- agenda
- entities: name, entity_type, description
- entity_type 请使用: Department部门Project项目Metric指标Person人物System系统Document文档
- relations:
- subject
- subject_type
- predicate
- object
- object_type
- description
- fact
- qualifiers
- evidence
- confidence
- valid_at
- invalid_at
- source_entity_name: 源实体名称
- target_entity_name: 目标实体名称
- relation_type: 关系类型 HAS_PROJECTHAS_METRIC负责汇报目标值推进依赖
- fact: 一句自然语言事实描述
- valid_at可选
- invalid_at可选
- evidence: 原文证据
- qualifiers: 限定条件列表
- confidence: 0~1
- action_items: task, assignee, deadline, status, priority
- decisions: content, proposer, status
- metrics: metric_name, value, target, owner, trend
- metrics: metric_name, value, target, owner, trend, unit
- departments: [{{"name": "部门名称", "description": "", "projects": ["项目名1", "项目名2"]}}]
- summary
层次关系规则
1. Department 管辖 Project relation_type HAS_PROJECT
2. Project 拥有 Metric relation_type HAS_METRIC
3. 其他事实关系负责汇报目标值等直接用 relation_type 表达
关系抽取规则
1. 不要只抽汇报了工作这种会议动作要尽量继续下钻出具体事实
2. 如果一句话里同时包含主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势应拆成多条关系或在 qualifiers 中保留这些细节
3. 对于要求部署负责依赖影响约束目标风险类信息优先保留
1. 不要只抽"汇报了工作"这种会议动作要尽量继续下钻出具体事实
2. 如果一句话里同时包含"主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势"应拆成多条关系或在 qualifiers 中保留这些细节
3. 对于"要求、部署、负责、依赖、影响、约束、目标、风险"类信息优先保留
4. fact 必须是一句完整自然可检索的事实描述
5. qualifiers 用于补充数值范围状态条件截止时间优先级等信息
6. evidence 用原文中的关键词短句不要太长
@ -181,54 +460,43 @@ def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
会议记录如下
{text}
"""
content = _call_llm(EXTRACTION_SYSTEM_PROMPT, user_prompt, stream=stream)
content = _call_llm([
{'role': 'system', 'content': EXTRACTION_SYSTEM_PROMPT},
{'role': 'user', 'content': user_prompt},
], stream=stream)
data = _try_parse_json(content)
data = _normalize_meeting_data(data)
return MeetingExtraction(**data)
def _try_parse_json(content: str) -> dict:
try:
return json.loads(content)
except json.JSONDecodeError:
logger.warning("JSON parsing failed; trying to repair extracted block")
match = re.search(r"\{.*\}", content, re.DOTALL)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError as exc:
logger.error("Repaired JSON still failed to parse: %s", exc)
raise
def _normalize_meeting_data(data: dict) -> dict:
if not isinstance(data, dict):
return {}
return {
"title": _as_str(data.get("title")),
"date": _as_str(data.get("date")),
"participants": _as_str_list(data.get("participants")),
"agenda": _as_str_list(data.get("agenda")),
"entities": _normalize_entities(data.get("entities")),
"relations": _normalize_relations(data.get("relations")),
"action_items": _normalize_action_items(data.get("action_items")),
"decisions": _normalize_decisions(data.get("decisions")),
"metrics": _normalize_metrics(data.get("metrics")),
"summary": _as_str(data.get("summary")),
'title': _as_str(data.get('title')),
'date': _as_str(data.get('date')),
'participants': _as_str_list(data.get('participants')),
'agenda': _as_str_list(data.get('agenda')),
'entities': _normalize_entities(data.get('entities')),
'relations': _normalize_relations(data.get('relations')),
'action_items': _normalize_action_items(data.get('action_items')),
'decisions': _normalize_decisions(data.get('decisions')),
'metrics': _normalize_metrics(data.get('metrics')),
'departments': _normalize_departments(data.get('departments')),
'summary': _as_str(data.get('summary')),
}
def _as_str(value) -> str:
if value is None:
return ""
return ''
if isinstance(value, str):
return value
return str(value)
def _as_float(value) -> float:
if value is None or value == "":
if value is None or value == '':
return 0.0
try:
numeric = float(value)
@ -244,7 +512,7 @@ def _as_str_list(value) -> List[str]:
key_text = _as_str(key)
value_text = _as_str(item)
if key_text and value_text:
items.append(f"{key_text}: {value_text}")
items.append(f'{key_text}: {value_text}')
elif key_text:
items.append(key_text)
elif value_text:
@ -262,50 +530,38 @@ def _normalize_entities(value) -> List[dict]:
for entity in value:
if not isinstance(entity, dict):
continue
items.append(
{
"name": _as_str(entity.get("name")),
"entity_type": _as_str(entity.get("entity_type")),
"description": _as_str(entity.get("description")),
}
)
items.append({
'name': _as_str(entity.get('name')),
'entity_type': _as_str(entity.get('entity_type')),
'description': _as_str(entity.get('description')),
})
return items
def _normalize_relations(value) -> List[dict]:
if not isinstance(value, list):
return []
items = []
for relation in value:
if not isinstance(relation, dict):
continue
subject = _as_str(relation.get("subject"))
predicate = _as_str(relation.get("predicate"))
obj = _as_str(relation.get("object"))
description = _as_str(relation.get("description"))
fact = _as_str(relation.get("fact"))
if not fact and subject and predicate and obj:
fact = f"{subject} {predicate} {obj}"
items.append(
{
"subject": subject,
"subject_type": _as_str(relation.get("subject_type")),
"predicate": predicate,
"object": obj,
"object_type": _as_str(relation.get("object_type")),
"description": description,
"fact": fact,
"qualifiers": _as_str_list(relation.get("qualifiers")),
"evidence": _as_str(relation.get("evidence")),
"confidence": _as_float(relation.get("confidence")),
"valid_at": _as_str(relation.get("valid_at")),
"invalid_at": _as_str(relation.get("invalid_at")),
}
)
source = _as_str(relation.get('source_entity_name') or relation.get('subject', ''))
target = _as_str(relation.get('target_entity_name') or relation.get('object', ''))
rtype = _as_str(relation.get('relation_type') or relation.get('predicate', ''))
fact = _as_str(relation.get('fact'))
if not fact and source and rtype and target:
fact = f'{source} {rtype} {target}'
items.append({
'source_entity_name': source,
'target_entity_name': target,
'relation_type': rtype,
'fact': fact,
'qualifiers': _as_str_list(relation.get('qualifiers')),
'evidence': _as_str(relation.get('evidence')),
'confidence': _as_float(relation.get('confidence')),
'valid_at': _as_str(relation.get('valid_at')),
'invalid_at': _as_str(relation.get('invalid_at')),
})
return items
@ -316,15 +572,13 @@ def _normalize_action_items(value) -> List[dict]:
for action in value:
if not isinstance(action, dict):
continue
items.append(
{
"task": _as_str(action.get("task")),
"assignee": _as_str(action.get("assignee")),
"deadline": _as_str(action.get("deadline")),
"status": _as_str(action.get("status")) or "待办",
"priority": _as_str(action.get("priority")) or "",
}
)
items.append({
'task': _as_str(action.get('task')),
'assignee': _as_str(action.get('assignee')),
'deadline': _as_str(action.get('deadline')),
'status': _as_str(action.get('status')) or '待办',
'priority': _as_str(action.get('priority')) or '',
})
return items
@ -335,13 +589,11 @@ def _normalize_decisions(value) -> List[dict]:
for decision in value:
if not isinstance(decision, dict):
continue
items.append(
{
"content": _as_str(decision.get("content")),
"proposer": _as_str(decision.get("proposer")),
"status": _as_str(decision.get("status")) or "已决",
}
)
items.append({
'content': _as_str(decision.get('content')),
'proposer': _as_str(decision.get('proposer')),
'status': _as_str(decision.get('status')) or '已决',
})
return items
@ -352,13 +604,30 @@ def _normalize_metrics(value) -> List[dict]:
for metric in value:
if not isinstance(metric, dict):
continue
items.append(
{
"metric_name": _as_str(metric.get("metric_name")),
"value": _as_str(metric.get("value")),
"target": _as_str(metric.get("target")),
"owner": _as_str(metric.get("owner")),
"trend": _as_str(metric.get("trend")),
}
)
items.append({
'metric_name': _as_str(metric.get('metric_name')),
'value': _as_str(metric.get('value')),
'target': _as_str(metric.get('target')),
'owner': _as_str(metric.get('owner')),
'trend': _as_str(metric.get('trend')),
'unit': _as_str(metric.get('unit')),
})
return items
def _normalize_departments(value) -> List[dict]:
if not isinstance(value, list):
return []
items = []
for dept in value:
if not isinstance(dept, dict):
continue
name = _as_str(dept.get('name'))
if not name:
continue
items.append({
'name': name,
'description': _as_str(dept.get('description')),
'projects': _as_str_list(dept.get('projects')),
})
return items

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,18 @@
import hashlib
import logging
from typing import Callable, Optional
from typing import Callable, List, Optional
from meeting_memory.config import config
from meeting_memory.extractor import MeetingExtraction, extract_meeting_info
from meeting_memory.extractor import (
MeetingExtraction,
extract_entities_from_text,
extract_facts_from_text,
extract_meeting_info as monolithic_extract,
)
from meeting_memory.extractor import (
resolve_entities_against_graph,
resolve_facts_against_graph,
)
from meeting_memory.graph_store import graph_store
from meeting_memory.meeting_state import MeetingStateStore
from meeting_memory.raw_store import raw_meeting_store
@ -15,8 +24,9 @@ ProgressCallback = Callable[[int, int, str], None]
class MeetingProcessor:
def process_meeting_file(self, filepath: str, force: bool = False) -> Optional[str]:
with open(filepath, "r", encoding="utf-8") as file_obj:
with open(filepath, 'r', encoding='utf-8') as file_obj:
text = file_obj.read()
return self.process_meeting_text(text, force=force)
@ -26,147 +36,313 @@ class MeetingProcessor:
force: bool = False,
interactive: bool = True,
progress_callback: Optional[ProgressCallback] = None,
use_multistep_extraction: bool = True,
) -> Optional[str]:
def report(step: int, message: str) -> None:
def report(step: int, total: int, message: str) -> None:
if progress_callback:
progress_callback(step, 7, message)
print(f"[{step}/7] {message}")
progress_callback(step, total, message)
print(f'[{step}/{total}] {message}')
report(1, "计算内容哈希")
if use_multistep_extraction:
return self._process_multistep(text, force, interactive, report)
else:
return self._process_monolithic(text, force, interactive, report)
def _process_monolithic(
self, text: str, force: bool, interactive: bool,
report: Callable,
) -> Optional[str]:
total_steps = 7
report(1, total_steps, '计算内容哈希')
content_hash = self._compute_content_hash(text)
if not force and state_store.has_content_hash(content_hash):
logger.info("Duplicate content hash skipped: %s", content_hash[:12])
logger.info('Duplicate content hash skipped: %s', content_hash[:12])
return None
if not force:
report(2, "Neo4j 语义相似去重检索")
report(2, total_steps, 'Neo4j 语义相似去重检索')
similar = graph_store.find_similar_episode(text, threshold=0.92)
if similar:
meta = similar["metadata"]
meta = similar['metadata']
if not interactive:
logger.info(
"Skipped similar meeting in non-interactive mode: %s",
meta.get("title", ""),
)
logger.info('Skipped similar meeting: %s', meta.get('title', ''))
return None
print(
f"\n发现相似会议:{meta.get('title', '')} ({meta.get('date', '')}) "
f"相似度 {similar['score']:.2%}"
)
print(f'\n发现相似会议:{meta.get("title", "")} ({meta.get("date", "")}) 相似度 {similar["score"]:.2%}')
while True:
choice = input("选择 [s]跳过 / [o]覆盖(默认 s").strip().lower() or "s"
if choice == "s":
logger.info("Skipped similar meeting: %s", meta.get("title", ""))
choice = input('选择 [s]跳过 / [o]覆盖(默认 s').strip().lower() or 's'
if choice == 's':
logger.info('Skipped similar meeting: %s', meta.get('title', ''))
return None
if choice == "o":
if choice == 'o':
force = True
break
print("请输入 s 或 o。")
print('请输入 s 或 o。')
else:
report(2, "跳过语义去重,按覆盖模式继续")
report(2, total_steps, '跳过语义去重,按覆盖模式继续')
report(3, "调用大模型抽取结构化信息")
meeting_data = self._extract(text)
report(3, total_steps, '调用大模型抽取结构化信息(单步模式)')
meeting_data = self._extract_monolithic(text)
if not meeting_data:
logger.error("Failed to extract meeting information")
logger.error('Failed to extract meeting information')
return None
data_dict = meeting_data.model_dump()
data_dict["_content_hash"] = content_hash
data_dict["_graph_meeting_id"] = graph_store.meeting_id(data_dict)
return self._finish_pipeline(data_dict, content_hash, text, force, interactive, report, total_steps)
report(4, "检查标题和日期重复")
def _process_multistep(
self, text: str, force: bool, interactive: bool,
report: Callable,
) -> Optional[str]:
total_steps = 10
report(1, total_steps, '计算内容哈希')
content_hash = self._compute_content_hash(text)
if not force and state_store.has_content_hash(content_hash):
logger.info('Duplicate content hash skipped: %s', content_hash[:12])
return None
if not force:
report(2, total_steps, 'Neo4j 语义相似去重检索')
similar = graph_store.find_similar_episode(text, threshold=0.92)
if similar:
meta = similar['metadata']
if not interactive:
logger.info('Skipped similar meeting: %s', meta.get('title', ''))
return None
print(f'\n发现相似会议:{meta.get("title", "")} ({meta.get("date", "")}) 相似度 {similar["score"]:.2%}')
while True:
choice = input('选择 [s]跳过 / [o]覆盖(默认 s').strip().lower() or 's'
if choice == 's':
logger.info('Skipped similar meeting: %s', meta.get('title', ''))
return None
if choice == 'o':
force = True
break
print('请输入 s 或 o。')
else:
report(2, total_steps, '跳过语义去重,按覆盖模式继续')
# Step 3: 提取标题、日期、参与人等元信息
report(3, total_steps, '抽取会议元信息(标题、日期、参与者等)')
meta_info = self._extract_monolithic(text, stream=interactive)
if not meta_info:
logger.error('Failed to extract meeting metadata')
return None
data_dict = meta_info.model_dump()
data_dict['_content_hash'] = content_hash
data_dict['_graph_meeting_id'] = graph_store.meeting_id(data_dict)
data_dict['_original_text'] = text
# Step 4: 抽取实体节点LLM 调用 1
report(4, total_steps, '第 1 步实体抽取:识别会议中提及的实体')
use_stream = interactive
previous_episodes = self._get_previous_episodes_context(data_dict)
extracted_entities = extract_entities_from_text(
text, previous_episodes=previous_episodes, stream=use_stream
)
logger.info('Extracted %d entities from meeting', len(extracted_entities))
if not extracted_entities:
logger.warning('No entities extracted, aborting')
return None
# Step 5: 实体去重(与已有图谱对比 + LLM 裁决)
report(5, total_steps, '实体去重:与图谱中已有实体对比')
resolved_entities = self._dedup_entities(extracted_entities, text)
data_dict['entities'] = resolved_entities
logger.info('After dedup: %d entities remain', len(resolved_entities))
# Step 6: 抽取事实关系LLM 调用 2
report(6, total_steps, '事实抽取:提取实体间的结构化关系')
reference_time = data_dict.get('date', '')
extracted_facts = extract_facts_from_text(
text, resolved_entities,
reference_time=reference_time,
previous_episodes=previous_episodes,
stream=use_stream,
)
logger.info('Extracted %d facts from meeting', len(extracted_facts))
# Step 7: 事实去重与矛盾检测
report(7, total_steps, '事实解析:去重与矛盾检测')
resolved_facts = self._dedup_facts(extracted_facts, data_dict)
data_dict['relations'] = resolved_facts
logger.info('After dedup: %d facts remain', len(resolved_facts))
# Step 8: 检查标题和日期重复
report(8, total_steps, '检查标题和日期重复')
should_skip = self._handle_duplicate(data_dict, force=force, interactive=interactive)
if should_skip:
return None
meeting_title = data_dict.get("title", "")
meeting_date = data_dict.get("date", "")
meeting_title = data_dict.get('title', '')
meeting_date = data_dict.get('date', '')
report(5, "归档原始会议文本")
# Step 9: 归档 + 合并行动项/指标
report(9, total_steps, '归档和状态合并')
raw_path = raw_meeting_store.save(text, title=meeting_title, date=meeting_date)
data_dict["_original_text"] = text
data_dict["_original_text_path"] = raw_path
data_dict['_original_text_path'] = raw_path
meeting_filename = f"{graph_store.meeting_id(data_dict)}.md"
report(6, "合并行动项和指标状态")
data_dict["action_items"] = state_store.merge_action_items(
data_dict.get("action_items", []),
meeting_title,
meeting_date,
meeting_filename,
meeting_filename = f'{graph_store.meeting_id(data_dict)}.md'
data_dict['action_items'] = state_store.merge_action_items(
data_dict.get('action_items', []),
meeting_title, meeting_date, meeting_filename,
)
data_dict["metrics"] = state_store.merge_metrics(
data_dict.get("metrics", []),
meeting_title,
meeting_date,
meeting_filename,
data_dict['metrics'] = state_store.merge_metrics(
data_dict.get('metrics', []),
meeting_title, meeting_date, meeting_filename,
)
state_store.add_content_hash(content_hash, meeting_title, meeting_date, meeting_filename)
state_store.save()
report(7, "写入 Neo4j 图谱和检索数据")
# Step 10: 写入 Neo4j
report(10, total_steps, '写入 Neo4j 图谱')
graph_store.upsert_meeting_subgraph(data_dict)
logger.info("Meeting processed: %s", meeting_title)
logger.info('Meeting processed (multi-step): %s', meeting_title)
return raw_path
def _get_previous_episodes_context(self, data_dict: dict) -> list:
meeting_title = data_dict.get('title', '')
meeting_date = data_dict.get('date', '')
series_info = state_store.get_series_info(meeting_title)
if not series_info:
return []
processed = series_info.get('processed_titles', [])
if not processed:
return []
rows = graph_store.run_query('''
MATCH (m:Meeting)
WHERE m.title IN $titles
OPTIONAL MATCH (m)-[:HAS_EPISODE]->(ep:Episode)
RETURN m.title AS title, m.date AS date, ep.summary AS summary, ep.content AS content
ORDER BY m.date DESC
LIMIT 3
''', titles=processed[-3:])
return [{'content': r.get('content', r.get('summary', '')), 'timestamp': r.get('date', '')} for r in rows]
def _dedup_entities(self, extracted: list, text: str) -> list:
try:
existing = graph_store.get_entities_map()
if not existing:
return extracted
existing_list = [
{
'candidate_id': i,
'name': v['name'],
'entity_type': v.get('entity_type', ''),
'summary': v.get('summary', '') or v.get('description', ''),
}
for i, v in enumerate(existing.values())
]
return resolve_entities_against_graph(extracted, existing_list, episode_content=text)
except Exception as exc:
logger.warning('Entity dedup failed, keeping all extracted: %s', exc)
return extracted
def _dedup_facts(self, facts: list, data_dict: dict) -> list:
resolved = []
for fact in facts:
try:
source = fact.get('source_entity_name', '')
target = fact.get('target_entity_name', '')
existing = graph_store.get_facts_between(source, target)
if not existing:
resolved.append(fact)
continue
result = resolve_facts_against_graph(fact, existing, [])
if isinstance(result, dict) and result.get('is_duplicate'):
logger.debug('Skipped duplicate fact: %s', fact.get('fact', ''))
continue
resolved.append(fact)
except Exception as exc:
logger.warning('Fact dedup failed, keeping: %s', exc)
resolved.append(fact)
return resolved
def _finish_pipeline(
self, data_dict: dict, content_hash: str, text: str,
force: bool, interactive: bool, report: Callable, total_steps: int,
) -> Optional[str]:
data_dict['_content_hash'] = content_hash
data_dict['_graph_meeting_id'] = graph_store.meeting_id(data_dict)
report(4, total_steps, '检查标题和日期重复')
should_skip = self._handle_duplicate(data_dict, force=force, interactive=interactive)
if should_skip:
return None
meeting_title = data_dict.get('title', '')
meeting_date = data_dict.get('date', '')
report(5, total_steps, '归档原始会议文本')
raw_path = raw_meeting_store.save(text, title=meeting_title, date=meeting_date)
data_dict['_original_text'] = text
data_dict['_original_text_path'] = raw_path
meeting_filename = f'{graph_store.meeting_id(data_dict)}.md'
report(6, total_steps, '合并行动项和指标状态')
data_dict['action_items'] = state_store.merge_action_items(
data_dict.get('action_items', []), meeting_title, meeting_date, meeting_filename,
)
data_dict['metrics'] = state_store.merge_metrics(
data_dict.get('metrics', []), meeting_title, meeting_date, meeting_filename,
)
state_store.add_content_hash(content_hash, meeting_title, meeting_date, meeting_filename)
state_store.save()
report(7, total_steps, '写入 Neo4j 图谱和检索数据')
graph_store.upsert_meeting_subgraph(data_dict)
logger.info('Meeting processed: %s', meeting_title)
return raw_path
def _handle_duplicate(self, data_dict: dict, force: bool, interactive: bool = True) -> bool:
title = data_dict.get("title", "")
date = data_dict.get("date", "")
title = data_dict.get('title', '')
date = data_dict.get('date', '')
existing = graph_store.get_meeting(title, date)
if not existing:
return False
if force:
logger.info("Duplicate meeting found; overwriting in force mode: %s", title)
logger.info('Duplicate meeting found; overwriting in force mode: %s', title)
self._remove_old(data_dict, existing)
return False
if not interactive:
logger.info("Skipped duplicate meeting in non-interactive mode: %s", title)
logger.info('Skipped duplicate meeting in non-interactive mode: %s', title)
return True
print(f"\n发现重复会议:{title} ({date})")
print(f'\n发现重复会议:{title} ({date})')
while True:
choice = input("选择 [s]跳过 / [o]覆盖(默认 s").strip().lower() or "s"
if choice == "s":
logger.info("Skipped duplicate meeting: %s", title)
choice = input('选择 [s]跳过 / [o]覆盖(默认 s').strip().lower() or 's'
if choice == 's':
logger.info('Skipped duplicate meeting: %s', title)
return True
if choice == "o":
if choice == 'o':
self._remove_old(data_dict, existing)
return False
print("请输入 s 或 o。")
print('请输入 s 或 o。')
def _remove_old(self, data_dict: dict, existing: Optional[dict] = None) -> None:
meeting_id = graph_store.meeting_id(data_dict)
graph_store.remove_meeting_subgraph(meeting_id)
new_hash = data_dict.get("_content_hash", "")
new_hash = data_dict.get('_content_hash', '')
if new_hash:
state_store.remove_content_hash(new_hash)
if existing:
old_hash = existing.get("content_hash", "")
old_hash = existing.get('content_hash', '')
if old_hash and old_hash != new_hash:
state_store.remove_content_hash(old_hash)
logger.info("Removed old meeting artifacts: %s", data_dict.get("title", ""))
logger.info('Removed old meeting artifacts: %s', data_dict.get('title', ''))
def _compute_content_hash(self, text: str) -> str:
normalized = text.strip().replace("\r\n", "\n")
return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
normalized = text.strip().replace('\r\n', '\n')
return hashlib.sha256(normalized.encode('utf-8')).hexdigest()
def _extract(self, text: str) -> Optional[MeetingExtraction]:
def _extract_monolithic(self, text: str, *, stream: bool = True) -> Optional[MeetingExtraction]:
try:
return extract_meeting_info(text, stream=True)
return monolithic_extract(text, stream=stream)
except Exception as exc:
logger.error("LLM extraction failed: %s", exc)
logger.error('LLM extraction failed: %s', exc)
return None
def query(self, question: str, top_k: int = 3) -> str:
@ -174,10 +350,10 @@ class MeetingProcessor:
def stats(self) -> dict:
return {
"graph": graph_store.get_stats(),
"state": state_store.get_stats(),
"raw_dir": config.storage.raw_dir,
"state_path": config.state_path,
'graph': graph_store.get_stats(),
'state': state_store.get_stats(),
'raw_dir': config.storage.raw_dir,
'state_path': config.state_path,
}

View File

@ -0,0 +1,5 @@
from .extract_nodes import extract_entities
from .extract_edges import extract_facts
from .dedupe_nodes import resolve_entities
from .dedupe_edges import resolve_facts
from .summarize_nodes import summarize_entity

View File

@ -0,0 +1,49 @@
from typing import Any
def resolve_facts(context: dict[str, Any]) -> list[dict]:
existing_facts = context.get('existing_facts', [])
new_fact = context.get('new_fact', '')
invalidation_candidates = context.get('invalidation_candidates', [])
existing_text = '\n'.join(
f' [idx={i}] {f.get("fact", "")}' for i, f in enumerate(existing_facts)
)
invalidation_text = '\n'.join(
f' [idx={i + len(existing_facts)}] {f.get("fact", "")}'
for i, f in enumerate(invalidation_candidates)
)
user_prompt = f"""
<已有事实>
{existing_text}
</已有事实>
<事实失效候选>
{invalidation_text}
</事实失效候选>
<新事实>
{new_fact}
</新事实>
注意idx 编号是连续的已有事实从 0 开始失效候选紧随其后
任务
1. **重复检测**如果<新事实><已有事实>中的某条描述的是完全相同的客观事实返回该 idx
2. **矛盾检测**如果<新事实><已有事实><失效候选>中的某条相互矛盾如状态已更新数值已变更返回该 idx
返回格式
{{"duplicate_facts": [idx列表], "contradicted_facts": [idx列表]}}
如果没有重复或矛盾返回空列表
示例
- 新事实"张三负责宽带运维项目" vs 已有"张三负责宽带运维" 重复相同事实
- 新事实"宽带用户数当前值 8500" vs 已有"宽带用户数目标值 10000" 不重复不矛盾数值维度不同
- 新事实"宽带用户数当前值 9000" vs 已有"宽带用户数 8000" 矛盾同一指标数值更新
"""
return [
{'role': 'system', 'content': '你是事实去重和矛盾检测助手。判断新事实与已有事实的关系。'},
{'role': 'user', 'content': user_prompt},
]

View File

@ -0,0 +1,49 @@
from typing import Any
def resolve_entities(context: dict[str, Any]) -> list[dict]:
extracted = context.get('extracted_entities', [])
existing = context.get('existing_entities', [])
episode_content = context.get('episode_content', '')
extracted_text = '\n'.join(
f' [{i}] {e.get("name", "")}{e.get("entity_type", "未知")}{e.get("description", "")}'
for i, e in enumerate(extracted)
)
existing_text = '\n'.join(
f' [candidate_id={c.get("candidate_id", i)}] {c.get("name", "")}{c.get("entity_type", "未知")}{c.get("summary", "")[:100]}'
for i, c in enumerate(existing)
)
user_prompt = f"""
<当前会议内容>
{episode_content}
</当前会议内容>
<新抽取的实体>
{extracted_text}
</新抽取的实体>
<图谱中已有的实体>
{existing_text}
</图谱中已有的实体>
任务判断<新抽取的实体>中的每一个是否与<图谱中已有的实体>中的某个是同一个真实世界对象
判断标准
- **是重复**两个名称指向同一个真实世界的人组织地点项目指标等
- **不是重复**名称相似但指向不同实体如两个同名但不同的人同名的不同项目
对每个新抽取的实体返回
- id: 对应新抽取实体列表中的序号
- name: 实体的最佳名称优先使用已有实体中的更完整名称
- duplicate_candidate_id: 匹配到的已有实体的 candidate_id如果无匹配则填 -1
返回格式 JSON 数组[{{"id": 0, "name": "张三", "duplicate_candidate_id": -1}}, ...]
必须为新抽取的每个实体返回一条记录id 0 开始连续编号
"""
return [
{'role': 'system', 'content': '你是实体去重助手。判断两个实体是否指向同一个真实世界对象。'},
{'role': 'user', 'content': user_prompt},
]

View File

@ -0,0 +1,66 @@
from typing import Any
def extract_facts(context: dict[str, Any]) -> list[dict]:
previous = context.get('previous_episodes', [])
current = context.get('episode_content', '')
entities = context.get('entities', [])
reference_time = context.get('reference_time', '')
previous_section = ''
if previous:
import json
previous_section = f'\n<历史上下文>\n{json.dumps(previous, ensure_ascii=False)}\n</历史上下文>\n'
entities_text = '\n'.join(
f' [{i}] {e.get("name", "")}{e.get("entity_type", "未知")}' for i, e in enumerate(entities)
)
user_prompt = f"""
{previous_section}
<当前会议内容>
{current}
</当前会议内容>
<已抽取实体>
{entities_text}
</已抽取实体>
<参考时间>
{reference_time}
</参考时间>
抽取规则
1. <当前会议内容>中抽取上述<已抽取实体>之间的**事实关系**
2. 每条关系必须涉及两个**不同**的实体
3. 返回 JSON 数组格式
[{{
"source_entity_name": "源实体名称(必须来自上方的实体列表)",
"target_entity_name": "目标实体名称(必须来自上方的实体列表)",
"relation_type": "关系类型,如 负责、汇报、隶属于、参与、目标值、截止于、影响、依赖于",
"fact": "一句自然语言的事实描述,保留原文中所有具体细节(数值、时间、地点等)",
"valid_at": "该事实开始成立的时间ISO 8601格式如 2025-04-30T00:00:00Z不明确则留空",
"invalid_at": "该事实不再成立的时间,不明确则留空",
"evidence": "原文中的关键证据短句",
"qualifiers": ["限定条件列表,如数值、范围、状态、截止时间等"],
"confidence": 置信度0到1之间
}}]
4. relation_type 避免使用"关联""涉及"等空泛词优先使用具体谓词
负责汇报目标值当前值低于高于要求督导推进支撑依赖计划完成截止于参与隶属于分管协调审批
5. 层次关系结构隶属使用以下固定 relation_type
HAS_PROJECT: 部门管辖项目Department -> Project
HAS_METRIC: 项目拥有指标Project -> Metric
PART_OF: 实体属于某个上级实体
6. 同一对实体之间可能既有层次关系HAS_PROJECT也有事实关系负责汇报需要分别抽取
7. fact 必须是一句完整的自然语言事实保留所有具体信息人名数值产品名地点等
8. 如果根据上下文可以判断事实的开始/结束时间填入 valid_at / invalid_at
"""
return [
{'role': 'system', 'content': '你是一个专业的事实关系抽取专家。从会议记录中抽取实体间的结构化事实关系。'},
{'role': 'user', 'content': user_prompt},
]

View File

@ -0,0 +1,56 @@
from typing import Any
SYSTEM_PROMPT = (
'你是会议纪要实体抽取专家。'
'从会议记录中抽取明确的实体节点包括部门Department、项目Project、指标Metric、人物Person、系统System、文档Document等。'
'不要抽取抽象概念、情感、时间日期或泛泛的名词。'
)
def extract_entities(context: dict[str, Any]) -> list[dict]:
previous = context.get('previous_episodes', [])
current = context.get('episode_content', '')
entity_types = context.get('entity_types', [])
entity_types_section = ''
if entity_types:
entity_types_section = '\n'.join(
f' - {t["type"]}: {t["description"]}' for t in entity_types
)
else:
entity_types_section = ' - 未限定类型,请根据上下文自行判断'
previous_section = ''
if previous:
import json
previous_section = f'\n<历史上下文>\n{json.dumps(previous, ensure_ascii=False)}\n</历史上下文>\n'
user_prompt = f"""
{previous_section}
<当前会议内容>
{current}
</当前会议内容>
<实体类型>
{entity_types_section}
</实体类型>
抽取规则
1. 只抽取当前会议内容中**明确提及**的实体
2. 每个实体必须是有唯一标识的具体事物人名组织名地名项目名指标名称等
3. 不要抽取代词抽象概念增长改善风险时间日期
4. 如果同一实体在不同来源中以不同名称出现如简称/全称保留最完整的形式
5. 必须返回 JSON 数组格式[{{"name": "实体名称", "entity_type": "类型", "description": "描述", "evidence": "原文证据"}}]
6. description 写一段对该实体的简要描述20字以内
7. evidence 从原文中摘录提及该实体的关键短句
注意实体类型建议使用 Department部门Project项目Metric指标Person人物System系统Document文档请确保
- 部门Department会议中提到的具体部门名称"技术部""市场部"
- 项目Project部门负责的具体项目名称
- 指标Metric项目中提到的具体量化指标"响应时间""完成率"
"""
return [
{'role': 'system', 'content': SYSTEM_PROMPT},
{'role': 'user', 'content': user_prompt},
]

View File

@ -0,0 +1,41 @@
from typing import Any
def summarize_entity(context: dict[str, Any]) -> list[dict]:
entity_name = context.get('entity_name', '')
existing_summary = context.get('existing_summary', '')
episodes = context.get('episodes', [])
previous = context.get('previous_episodes', [])
existing_section = ''
if existing_summary:
existing_section = f'\n<已有摘要>\n{existing_summary}\n</已有摘要>\n'
previous_section = ''
if previous:
import json
previous_section = f'\n<历史内容>\n{json.dumps(previous, ensure_ascii=False)}\n</历史内容>\n'
episodes_text = '\n---\n'.join(episodes) if isinstance(episodes, list) else episodes
user_prompt = f"""
{previous_section}
<当前内容>
{episodes_text}
</当前内容>
{existing_section}
为实体 **{entity_name}** 生成一段信息密集的摘要
规则
1. 只使用<当前内容><已有摘要>中的事实不要推测
2. 保留所有实质性的人名角色地点日期数值
3. 用第三人称直接陈述事实
4. 不要使用"提及了""讨论了""指出"等元语言动词直接陈述事实
5. 如果会议对已有信息做了更新采用更新的说法
6. 摘要不超过 500
7. 返回 JSON{{"summary": "摘要内容"}}
"""
return [
{'role': 'system', 'content': '你是实体摘要助手。根据会议内容为实体生成信息密集的摘要。'},
{'role': 'user', 'content': user_prompt},
]

View File

@ -20,6 +20,7 @@ from meeting_memory.meeting_processor import meeting_processor, state_store
logger = logging.getLogger(__name__)
STATIC_DIR = Path(__file__).resolve().parent / "static"
STATIC_V2_DIR = Path(__file__).resolve().parent / "static_v2"
RAW_DIR = Path(config.storage.raw_dir)
IMPORT_JOBS = {}
IMPORT_JOBS_LOCK = threading.Lock()
@ -29,8 +30,22 @@ class GraphDemoHandler(SimpleHTTPRequestHandler):
def __init__(self, *args, **kwargs):
super().__init__(*args, directory=str(STATIC_DIR), **kwargs)
# ── Route: serve /static_v2/* from the v2 directory ──
def translate_path(self, path):
parsed = urlparse(path)
raw = parsed.path
# Serve /static_v2/* from static_v2 directory
if raw.startswith("/static_v2/"):
rel = raw[len("/static_v2/"):]
return str(STATIC_V2_DIR / rel)
return super().translate_path(path)
def do_GET(self):
parsed = urlparse(self.path)
# API endpoints
if parsed.path == "/api/dashboard":
self._handle_dashboard()
return
@ -55,10 +70,16 @@ class GraphDemoHandler(SimpleHTTPRequestHandler):
if parsed.path == "/api/import-status":
self._handle_import_status(parsed.query)
return
# Page routing — serve v2 HTML as default
if parsed.path in ("/", "/index.html"):
self.path = "/index.html"
self.path = "/static_v2/index.html"
elif parsed.path == "/graph":
self.path = "/graph.html"
self.path = "/static_v2/graph.html"
elif parsed.path == "/graph.html":
self.path = "/static_v2/graph.html"
# JS files (/app.js, /graph.js) resolve to STATIC_DIR via default translate_path
super().do_GET()
def do_POST(self):
@ -311,9 +332,9 @@ def _serialize_meeting(path: Path, include_content: bool = False):
lines = raw_text.splitlines()
for line in lines[:12]:
if line.startswith('title: "'):
title = line[len('title: "') : -1]
title = line[len('title: "'):-1]
elif line.startswith('date: "'):
date = line[len('date: "') : -1]
date = line[len('date: "'):-1]
content_start = 0
for idx, line in enumerate(lines):
@ -397,4 +418,4 @@ if __name__ == "__main__":
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
run_demo_server()
run_demo_server()

View File

@ -85,7 +85,7 @@ function renderStats(graph = {}, state = {}) {
{ label: "Neo4j", value: graph.enabled ? "在线" : "离线", icon: "⬡", color: graph.enabled ? "#34c759" : "#b3261e" },
{ label: "会议", value: graph.meetings ?? 0, icon: "📋", color: "#4a90d9" },
{ label: "实体", value: graph.entities ?? 0, icon: "◆", color: "#53c2da" },
{ label: "关系", value: graph.facts ?? 0, icon: "↗", color: "#ff9500" },
{ label: "关系", value: graph.relations ?? 0, icon: "↗", color: "#ff9500" },
{ label: "行动项", value: state.action_items_tracked ?? 0, icon: "☐", color: "#7f8bff" },
{ label: "指标", value: state.metrics_tracked ?? 0, icon: "📊", color: "#af52de" },
];

View File

@ -4,8 +4,7 @@ const graphNodeLimit = document.getElementById("graphNodeLimit");
const graphEdgeLimit = document.getElementById("graphEdgeLimit");
const graphSvg = document.getElementById("graphSvg");
const graphMeta = document.getElementById("graphMeta");
const graphDetail = document.getElementById("graphDetail");
const relatedSearch = document.getElementById("relatedSearch");
const detailPanel = document.getElementById("detailPanel");
const graphTypeFilter = document.getElementById("graphTypeFilter");
let selectedEntityTypes = null;
@ -76,35 +75,35 @@ async function loadGraphKinds() {
}
function renderInspector(content) {
graphDetail.innerHTML = content;
detailPanel.innerHTML = content;
}
async function loadRelated(query) {
if (!query) {
relatedSearch.innerHTML = "";
return;
}
if (!query) return;
const response = await fetch(`/api/search?q=${encodeURIComponent(query)}&limit=4`);
const payload = await response.json();
const results = payload.results || [];
if (!results.length) {
relatedSearch.innerHTML = empty("没有更多相关检索结果");
detailPanel.insertAdjacentHTML("beforeend", `
<div class="detail-section">
<p class="eyebrow">Related</p>
<div class="empty-state">没有更多相关检索结果</div>
</div>
`);
return;
}
relatedSearch.innerHTML = `
<div class="panel-head">
<div>
<p class="eyebrow">Related</p>
<h3>相关检索</h3>
</div>
detailPanel.insertAdjacentHTML("beforeend", `
<div class="detail-section">
<p class="eyebrow">Related</p>
<h3>相关检索</h3>
${results.map((item) => `
<article class="result-card">
<strong>${h(item.title || item.kind || "结果")}</strong>
<p>${h(item.text || "")}</p>
</article>
`).join("")}
</div>
${results.map((item) => `
<article class="result-card">
<strong>${h(item.title || item.kind || "结果")}</strong>
<p>${h(item.text || "")}</p>
</article>
`).join("")}
`;
`);
}
function renderGraph(payload) {
@ -125,7 +124,6 @@ function renderGraph(payload) {
if (!nodes.length) {
graphSvg.innerHTML = "";
renderInspector(empty("当前没有可显示的图谱数据"));
relatedSearch.innerHTML = "";
return;
}
@ -201,7 +199,6 @@ function renderGraph(payload) {
text.setAttribute("y", r + 16);
text.setAttribute("text-anchor", "middle");
text.setAttribute("font-size", "11");
text.setAttribute("fill", "#22264d");
text.setAttribute("data-type", "node-label");
text.textContent = truncate(node.label, TRUNCATE_LENGTH);
g.appendChild(text);
@ -392,36 +389,47 @@ function renderGraph(payload) {
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span>
</div>`;
} else if (kind === "fact") {
body = `
<p>${h(node.fact || node.description || "暂无描述")}</p>
<div class="chip-row">
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span>
</div>`;
} else {
const isMetric = (node.entity_type || "").toLowerCase() === "metric";
body = `
<p>${h(node.description || "暂无描述")}</p>
<div class="chip-row">
${node.entity_type ? `<span class="chip">${h(node.entity_type)}</span>` : ""}
${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
<span class="chip">关系 ${h(related.length)}</span>
</div>`;
</div>
${isMetric ? `
<div class="metric-fields">
${node.current_value ? `<p><strong>当前值:</strong>${h(node.current_value)}</p>` : ""}
${node.target ? `<p><strong>目标值:</strong>${h(node.target)}</p>` : ""}
${node.unit ? `<p><strong>单位:</strong>${h(node.unit)}</p>` : ""}
${node.trend ? `<p><strong>趋势:</strong>${h(node.trend)}</p>` : ""}
</div>` : ""}`;
}
renderInspector(`
<div class="detail-card">
<div class="detail-section">
<p class="eyebrow">${h(node.kind)}</p>
<h3>${h(node.label)}</h3>
${body}
</div>
${related.map((edge) => `
<article class="result-card">
<strong>${h(edge.source)} ${h(edge.target)}</strong>
<p>${h(edge.fact || edge.description || edge.predicate || "")}</p>
</article>
`).join("")}
<div class="detail-section">
<p class="eyebrow">Relations</p>
${related.length ? related.map((edge) => `
<article class="result-card">
<strong>${h(edge.source)} ${h(edge.target)}</strong>
<p>${h(edge.fact || edge.description || edge.predicate || "")}</p>
</article>
`).join("") : `<div class="empty-state">没有关联关系</div>`}
</div>
`);
loadRelated(node.label).catch(() => relatedSearch.innerHTML = empty("相关检索加载失败"));
loadRelated(node.label).catch(() => {
detailPanel.insertAdjacentHTML("beforeend", `
<div class="detail-section">
<p class="eyebrow">Related</p>
<div class="empty-state">相关检索加载失败</div>
</div>
`);
});
});
});
@ -432,7 +440,7 @@ function renderGraph(payload) {
line?.classList.add("active");
const edge = edges.find((item) => item.id === el.dataset.edgeId);
renderInspector(`
<div class="detail-card">
<div class="detail-section">
<p class="eyebrow">Edge</p>
<h3>${h(edge.source)} ${h(edge.target)}</h3>
<p>${h(edge.fact || edge.description || "暂无补充描述")}</p>
@ -444,7 +452,14 @@ function renderGraph(payload) {
</div>
</div>
`);
loadRelated(`${edge.source} ${edge.predicate} ${edge.target}`).catch(() => relatedSearch.innerHTML = empty("相关检索加载失败"));
loadRelated(`${edge.source} ${edge.predicate} ${edge.target}`).catch(() => {
detailPanel.insertAdjacentHTML("beforeend", `
<div class="detail-section">
<p class="eyebrow">Related</p>
<div class="empty-state">相关检索加载失败</div>
</div>
`);
});
});
});
@ -514,4 +529,4 @@ graphForm?.addEventListener("submit", (event) => {
});
loadGraphKinds().catch(() => {});
fetchGraph().catch((error) => renderInspector(empty(`图谱加载失败: ${error}`)));
fetchGraph().catch((error) => renderInspector(empty(`图谱加载失败: ${error}`)));

View File

@ -1,977 +0,0 @@
:root {
--primary: #5d67f5;
--primary-2: #7f8bff;
--primary-soft: #edf1ff;
--accent: #53c2da;
--bg: #f5f7ff;
--bg-2: #fbfcff;
--panel: rgba(255, 255, 255, 0.9);
--panel-strong: rgba(255, 255, 255, 0.96);
--border: rgba(212, 221, 247, 0.95);
--text: #22264d;
--muted: #68709d;
--danger: #b3261e;
--success: #11693c;
--shadow: 0 12px 28px rgba(73, 81, 141, 0.08);
--shadow-sm: 0 6px 16px rgba(73, 81, 141, 0.06);
--radius-xl: 20px;
--radius-lg: 16px;
--radius-md: 12px;
--radius-sm: 10px;
}
* { box-sizing: border-box; }
html, body {
margin: 0;
min-height: 100%;
}
body {
font-family: "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif;
font-size: 13px;
color: var(--text);
background:
radial-gradient(circle at 10% 10%, rgba(126, 186, 255, 0.16), transparent 24%),
radial-gradient(circle at 88% 14%, rgba(132, 121, 255, 0.12), transparent 22%),
linear-gradient(135deg, #f8faff 0%, var(--bg) 55%, var(--bg-2) 100%);
}
a { color: inherit; text-decoration: none; }
button, input, textarea { font: inherit; }
.shell {
display: grid;
grid-template-columns: 220px minmax(0, 1fr);
gap: 14px;
min-height: 100vh;
padding: 14px;
}
.sidebar, .panel, .detail-modal::backdrop {
backdrop-filter: blur(12px);
}
.sidebar {
display: flex;
flex-direction: column;
gap: 10px;
padding: 14px;
border: 1px solid var(--border);
border-radius: 22px;
background: linear-gradient(180deg, rgba(236, 243, 255, 0.92), rgba(255, 255, 255, 0.8));
box-shadow: var(--shadow);
}
.brand {
display: flex;
gap: 10px;
align-items: center;
}
.brand-mark {
width: 40px;
height: 40px;
display: grid;
place-items: center;
border-radius: 14px;
color: #fff;
font-size: 17px;
font-weight: 800;
background: linear-gradient(135deg, var(--primary), var(--primary-2));
}
.brand-kicker, .eyebrow {
margin: 0 0 3px;
color: var(--primary);
font-size: 10px;
font-weight: 700;
letter-spacing: 0.08em;
text-transform: uppercase;
}
.brand h1, .panel h3, .dialog-head h3 {
margin: 0;
}
.brand h1 { font-size: 18px; }
.nav {
display: grid;
gap: 6px;
}
.nav-link {
padding: 10px 12px;
border: 1px solid transparent;
border-radius: var(--radius-md);
color: var(--muted);
font-size: 13px;
font-weight: 700;
transition: 0.2s ease;
}
.nav-link:hover, .nav-link.active {
color: var(--primary);
border-color: rgba(109, 123, 255, 0.16);
background: rgba(255, 255, 255, 0.78);
}
.side-card, .panel {
border: 1px solid var(--border);
border-radius: var(--radius-xl);
background: var(--panel);
box-shadow: var(--shadow-sm);
}
.panel { padding: 14px; }
.panel-head {
display: flex;
justify-content: space-between;
align-items: start;
gap: 10px;
margin-bottom: 10px;
}
.panel h3 { font-size: 17px; }
.sidebar-shortcuts {
display: flex;
flex-wrap: wrap;
gap: 6px;
padding: 10px;
margin-top: auto;
}
.pill-link, .chip {
display: inline-flex;
align-items: center;
min-height: 24px;
padding: 0 9px;
border-radius: 999px;
font-size: 11px;
font-weight: 700;
}
.pill-link {
background: rgba(255, 255, 255, 0.9);
border: 1px solid var(--border);
}
.chip {
background: var(--primary-soft);
color: var(--primary);
}
.chip.status-done, .chip.status-completed { background: #edfdf4; color: var(--success); }
.chip.status-pending, .chip.status-todo { background: #fff8e7; color: #b8860b; }
.chip.status-in_progress, .chip.status-active { background: #e8f4fd; color: #4a90d9; }
.chip.status-blocked { background: #fff4f2; color: var(--danger); }
.main {
display: flex;
flex-direction: column;
gap: 12px;
min-height: 0;
}
.main-toolbar {
display: flex;
justify-content: space-between;
align-items: center;
gap: 16px;
padding: 16px 18px;
border: 1px solid var(--border);
border-radius: 22px;
background:
radial-gradient(circle at top right, rgba(134, 144, 255, 0.12), transparent 28%),
linear-gradient(180deg, rgba(255, 255, 255, 0.94), rgba(244, 248, 255, 0.96));
box-shadow: var(--shadow);
}
.main-toolbar h2 {
margin: 0;
font-size: 22px;
}
.main-toolbar-actions {
display: flex;
gap: 8px;
}
.btn, .icon-btn {
border: none;
cursor: pointer;
transition: 0.2s ease;
}
.btn {
display: inline-flex;
align-items: center;
justify-content: center;
min-height: 36px;
padding: 0 14px;
border-radius: 11px;
font-size: 12px;
font-weight: 700;
color: #fff;
background: linear-gradient(135deg, var(--primary), var(--primary-2));
box-shadow: 0 8px 18px rgba(93, 103, 245, 0.18);
}
.btn:hover, .icon-btn:hover { transform: translateY(-1px); }
.btn:disabled {
opacity: 0.68;
cursor: not-allowed;
transform: none;
}
.btn.ghost {
color: var(--primary);
background: rgba(255, 255, 255, 0.94);
box-shadow: none;
border: 1px solid var(--border);
}
.stats-grid, .content-grid, .workspace-grid {
display: grid;
gap: 12px;
}
.stats-grid { grid-template-columns: repeat(4, minmax(0, 1fr)); }
.highlight-card {
padding: 0;
border: 1px solid var(--border);
border-radius: var(--radius-lg);
background: var(--panel-strong);
box-shadow: var(--shadow-sm);
overflow: hidden;
}
.highlight-card .hc-bar {
height: 4px;
background: var(--card-accent);
}
.highlight-card .eyebrow {
padding: 12px 14px 0;
}
.highlight-card strong {
display: block;
margin: 4px 0 2px;
padding: 0 14px;
font-size: 26px;
color: var(--card-accent);
}
.highlight-card p:last-child {
padding: 0 14px 14px;
margin: 0;
color: var(--muted);
}
.dashboard-grid {
grid-template-columns: minmax(330px, 1.1fr) minmax(340px, 1fr) minmax(220px, 0.72fr);
align-items: start;
}
.search-box, .import-form, .import-fieldset {
display: grid;
gap: 8px;
}
.import-fieldset {
margin: 0;
padding: 0;
border: 0;
min-width: 0;
}
.import-fieldset:disabled { opacity: 0.6; }
.search-box input, .graph-controls input, textarea, input[type="file"] {
width: 100%;
min-height: 38px;
padding: 9px 12px;
border: 1px solid var(--border);
border-radius: 11px;
background: rgba(255, 255, 255, 0.94);
color: var(--text);
}
textarea {
min-height: 138px;
resize: vertical;
}
.field-label {
font-size: 11px;
font-weight: 700;
color: var(--muted);
}
.check-row {
display: flex;
align-items: center;
gap: 8px;
font-size: 12px;
color: var(--muted);
}
.status-box {
margin-top: 10px;
padding: 10px 12px;
border-radius: 12px;
border: 1px solid var(--border);
background: rgba(255, 255, 255, 0.76);
font-size: 12px;
color: var(--muted);
}
.status-box[data-kind="error"] {
color: var(--danger);
background: #fff4f2;
}
.status-box[data-kind="success"] {
color: var(--success);
background: #edfdf4;
}
.progress-list, .search-results, .mini-stats, .card-list, .list-stack, .related-search {
display: grid;
gap: 8px;
}
.progress-item, .mini-stat, .card, .list-item, .result-card, .detail-card {
padding: 12px;
border: 1px solid var(--border);
border-radius: 14px;
background: rgba(255, 255, 255, 0.88);
}
.progress-item {
display: grid;
grid-template-columns: 24px 1fr;
gap: 8px;
align-items: start;
}
.progress-index {
width: 24px;
height: 24px;
display: grid;
place-items: center;
border-radius: 999px;
background: var(--primary-soft);
color: var(--primary);
font-size: 11px;
font-weight: 700;
}
.mini-stat {
display: flex;
align-items: center;
gap: 10px;
padding: 10px 12px;
}
.ms-icon {
width: 32px;
height: 32px;
display: grid;
place-items: center;
border-radius: 10px;
font-size: 15px;
background: color-mix(in srgb, var(--stat-color) 14%, transparent);
color: var(--stat-color);
flex-shrink: 0;
}
.ms-body strong {
display: block;
font-size: 16px;
line-height: 1.2;
}
.ms-body p {
margin: 0;
font-size: 11px;
color: var(--muted);
}
.mini-stat strong, .card h4, .list-item strong, .result-card strong {
display: block;
margin-bottom: 4px;
}
.card { cursor: pointer; }
.card:hover, .result-card:hover, .list-item:hover {
border-color: rgba(120, 132, 255, 0.34);
}
.content-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
/* ── Meeting card ── */
.meeting-card {
display: flex;
gap: 10px;
padding: 12px;
border: 1px solid var(--border);
border-radius: 14px;
background: rgba(255, 255, 255, 0.88);
cursor: pointer;
transition: 0.2s ease;
}
.meeting-card:hover {
border-color: rgba(120, 132, 255, 0.34);
}
.mc-date {
flex-shrink: 0;
width: 44px;
height: 44px;
display: grid;
place-items: center;
border-radius: 10px;
background: var(--primary-soft);
color: var(--primary);
font-size: 11px;
font-weight: 700;
text-align: center;
line-height: 1.2;
}
.mc-body h4 {
margin: 0 0 4px;
font-size: 13px;
}
.mc-body p {
margin: 0;
font-size: 12px;
color: var(--muted);
display: -webkit-box;
-webkit-line-clamp: 2;
-webkit-box-orient: vertical;
overflow: hidden;
}
/* ── List item with priority dot ── */
.list-item {
display: flex;
gap: 10px;
padding: 12px;
border: 1px solid var(--border);
border-radius: 14px;
background: rgba(255, 255, 255, 0.88);
}
.li-priority {
flex-shrink: 0;
width: 4px;
border-radius: 2px;
background: var(--pri-color);
}
.li-body {
flex: 1;
min-width: 0;
}
.li-body strong {
display: block;
margin-bottom: 2px;
}
.li-body p {
margin: 0 0 6px;
font-size: 12px;
color: var(--muted);
}
/* ── Metric card ── */
.metric-card {
padding: 12px;
border: 1px solid var(--border);
border-radius: 14px;
background: rgba(255, 255, 255, 0.88);
}
.mc-head {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 2px;
}
.mc-head strong {
display: block;
}
.mc-value {
font-size: 16px;
font-weight: 700;
color: var(--primary);
}
.metric-card p {
margin: 0 0 8px;
font-size: 12px;
color: var(--muted);
}
.mc-bar-track {
height: 4px;
border-radius: 2px;
background: rgba(212, 221, 247, 0.5);
margin-bottom: 8px;
overflow: hidden;
}
.mc-bar-fill {
height: 100%;
border-radius: 2px;
background: linear-gradient(90deg, var(--primary), var(--primary-2));
transition: width 0.4s ease;
}
/* ── Series card ── */
.series-card {
display: flex;
gap: 10px;
align-items: center;
padding: 12px;
border: 1px solid var(--border);
border-radius: 14px;
background: rgba(255, 255, 255, 0.88);
}
.sc-count {
flex-shrink: 0;
width: 36px;
height: 36px;
display: grid;
place-items: center;
border-radius: 10px;
font-size: 14px;
font-weight: 700;
background: var(--primary-soft);
color: var(--primary);
}
.sc-body strong {
display: block;
margin-bottom: 2px;
}
.sc-body p {
margin: 0;
font-size: 12px;
color: var(--muted);
}
/* ── Unified Import / Search panel ── */
.unified-panel {
display: flex;
flex-direction: column;
}
.unified-tabs {
display: flex;
gap: 4px;
margin-bottom: 12px;
padding: 3px;
border-radius: 11px;
background: rgba(212, 221, 247, 0.3);
}
.unified-tab {
flex: 1;
padding: 7px 12px;
border: none;
border-radius: 8px;
font-size: 12px;
font-weight: 700;
cursor: pointer;
background: transparent;
color: var(--muted);
transition: 0.2s ease;
}
.unified-tab.active {
background: #fff;
color: var(--primary);
box-shadow: 0 2px 6px rgba(73, 81, 141, 0.1);
}
.unified-tab:hover:not(.active) {
color: var(--text);
}
.unified-pane.hidden {
display: none;
}
/* ── Result card with kind badge ── */
.result-card {
position: relative;
}
.rc-kind {
display: inline-block;
padding: 1px 7px;
border-radius: 4px;
font-size: 10px;
font-weight: 700;
text-transform: uppercase;
background: var(--primary-soft);
color: var(--primary);
margin-bottom: 4px;
}
.empty-state {
padding: 16px 14px;
text-align: center;
border: 1px dashed var(--border);
border-radius: 14px;
color: var(--muted);
}
.detail-modal {
width: min(820px, calc(100vw - 24px));
border: 1px solid var(--border);
border-radius: 20px;
padding: 0;
background: rgba(255, 255, 255, 0.97);
box-shadow: var(--shadow);
}
.detail-modal::backdrop {
background: rgba(37, 44, 78, 0.28);
}
.dialog-head {
display: flex;
justify-content: space-between;
gap: 10px;
padding: 16px 16px 6px;
}
.dialog-meta { padding: 0 16px 6px; color: var(--muted); }
.dialog-content {
margin: 0;
padding: 0 16px 16px;
white-space: pre-wrap;
font-family: "Consolas", "Courier New", monospace;
max-height: 60vh;
overflow: auto;
color: var(--muted);
}
.icon-btn {
width: 30px;
height: 30px;
border-radius: 10px;
background: rgba(242, 245, 255, 0.92);
color: var(--primary);
font-size: 20px;
}
/* ── Graph page ── */
.graph-shell {
height: 100vh;
overflow: hidden;
gap: 10px;
padding: 10px;
}
.graph-shell .sidebar {
flex-shrink: 0;
}
.graph-shell .main {
gap: 8px;
}
.graph-shell .graph-layout {
gap: 8px;
}
.graph-shell .graph-layout .panel {
padding: 10px;
}
.graph-layout {
display: grid;
grid-template-columns: 1fr 300px;
gap: 12px;
flex: 1;
min-height: 0;
}
.graph-stage-panel {
display: flex;
flex-direction: column;
padding: 0;
overflow: hidden;
}
.graph-stage {
flex: 1;
min-height: 0;
position: relative;
background:
linear-gradient(180deg, rgba(251, 253, 255, 0.96), rgba(241, 246, 255, 0.94)),
radial-gradient(circle at center, rgba(133, 196, 255, 0.08), transparent 36%);
}
#graphSvg {
width: 100%;
height: 100%;
display: block;
}
.detail-panel {
display: flex;
flex-direction: column;
gap: 8px;
overflow: hidden;
}
.detail-panel .detail-card,
.detail-panel .related-search {
overflow-y: auto;
}
.detail-card {
flex-shrink: 0;
word-break: break-all;
}
.detail-card strong {
word-break: break-word;
}
.related-search {
flex-shrink: 0;
}
.related-search .result-card {
word-break: break-all;
}
/* ── Graph toolbar ── */
.graph-toolbar { padding: 8px 12px; }
.graph-controls {
display: flex;
gap: 6px;
align-items: center;
}
.graph-controls .search-input {
flex: 1;
min-height: 30px;
padding: 6px 10px;
}
.graph-controls label.field-label {
display: flex;
align-items: center;
gap: 2px;
white-space: nowrap;
font-size: 10px;
}
.graph-controls label.field-label input {
width: 44px;
min-height: 26px;
padding: 4px 6px;
}
.graph-controls .btn {
min-height: 30px;
padding: 0 12px;
font-size: 11px;
}
.graph-toolbar-row {
display: flex;
justify-content: space-between;
align-items: center;
flex-wrap: wrap;
gap: 6px;
margin-top: 6px;
}
.graph-actions {
display: flex;
align-items: center;
gap: 8px;
font-size: 11px;
color: var(--muted);
}
.graph-type-filter {
display: flex;
flex-wrap: wrap;
align-items: center;
gap: 4px 10px;
}
.graph-type-filter label {
display: inline-flex;
align-items: center;
gap: 3px;
font-size: 11px;
color: var(--muted);
cursor: pointer;
user-select: none;
}
.graph-type-filter label input {
margin: 0;
accent-color: var(--primary);
}
.graph-meta { font-size: 11px; color: var(--muted); }
/* ── Graph nodes & edges ── */
.graph-node { cursor: pointer; }
.graph-node circle {
stroke: rgba(255, 255, 255, 0.85);
stroke-width: 2;
transition: filter 0.15s;
}
.graph-node--meeting circle { fill: #4a90d9; }
.graph-node--episode circle { fill: #34c759; }
.graph-node--entity circle { fill: var(--accent); }
.graph-node--fact circle { fill: #ff9500; }
.graph-node:hover circle { filter: brightness(1.2); }
.graph-node text {
font-size: 11px;
fill: var(--text);
pointer-events: none;
user-select: none;
}
.graph-edge {
stroke: rgba(120, 136, 194, 0.42);
stroke-width: 1.6;
cursor: pointer;
transition: stroke 0.15s, stroke-width 0.15s;
}
.edge-wrap:hover .graph-edge {
stroke: rgba(120, 136, 194, 0.7);
stroke-width: 2;
}
.graph-edge.active {
stroke: var(--primary);
stroke-width: 2.4;
}
.edge-wrap text {
pointer-events: none;
user-select: none;
}
/* ── Legend ── */
.legend { font-size: 11px; color: var(--muted); }
.legend-dot {
display: inline-block;
width: 9px;
height: 9px;
border-radius: 50%;
margin-right: 6px;
}
.legend-dot.meeting { background: #4a90d9; }
.legend-dot.episode { background: #34c759; }
.legend-dot.entity { background: var(--accent); }
.legend-dot.fact { background: #ff9500; }
.graph-shell .sidebar {
gap: 8px;
padding: 10px;
}
.graph-shell .sidebar .legend {
display: flex;
flex-direction: column;
gap: 3px;
font-size: 11px;
padding: 0 4px;
}
.graph-shell .sidebar .legend .eyebrow {
margin-bottom: 4px;
}
/* ── Graph controls overlay ── */
.zoom-reset-btn, .pause-btn {
font-size: 11px;
min-height: 28px;
padding: 0 10px;
}
.zoom-hint {
font-size: 11px;
color: var(--muted);
padding: 4px 0;
}
/* ── Responsive ── */
@media (max-width: 1240px) {
.shell, .graph-shell, .dashboard-grid, .content-grid, .graph-layout, .stats-grid {
grid-template-columns: 1fr;
}
.sidebar { order: 2; }
.graph-shell { height: auto; overflow: auto; }
}
@media (max-width: 720px) {
.shell, .graph-shell {
padding: 10px;
gap: 10px;
}
.sidebar, .panel { border-radius: 18px; }
.search-box { grid-template-columns: 1fr; }
.graph-stage { min-height: 250px; }
.graph-controls { flex-wrap: wrap; }
.graph-controls .search-input { min-width: 100%; }
}

View File

@ -3,11 +3,12 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Neo4j Graph Explorer</title>
<link rel="stylesheet" href="/styles.css">
<title>图谱浏览 — Meeting Memory</title>
<link rel="stylesheet" href="/static_v2/styles.css">
</head>
<body>
<div class="shell graph-shell">
<!-- ====== Sidebar ====== -->
<aside class="sidebar">
<div class="brand">
<div class="brand-mark">G</div>
@ -18,25 +19,33 @@
</div>
<nav class="nav">
<a class="nav-link" href="/index.html">总览面板</a>
<a class="nav-link active" href="/graph.html">图谱浏览</a>
<a class="nav-link" href="/">总览面板</a>
<a class="nav-link active" href="/graph">图谱浏览</a>
</nav>
<div class="legend">
<p class="eyebrow" style="margin-bottom:6px">图例</p>
<p class="eyebrow">图例</p>
<span><i class="legend-dot meeting"></i>会议</span>
<span><i class="legend-dot episode"></i>片段</span>
<span><i class="legend-dot entity"></i>实体</span>
<span><i class="legend-dot fact"></i>事实</span>
<span><i class="legend-dot edge"></i>关系</span>
</div>
</aside>
<!-- ====== Main ====== -->
<main class="main">
<div class="graph-toolbar panel">
<!-- Graph Toolbar -->
<div class="panel graph-toolbar">
<form class="graph-controls" id="graphSearchForm">
<input id="graphQueryInput" type="text" placeholder="搜索节点名称或关键词…" class="search-input">
<label class="field-label">节点 <input id="graphNodeLimit" type="number" min="10" max="200" step="10" value="60"></label>
<label class="field-label">关系 <input id="graphEdgeLimit" type="number" min="10" max="300" step="10" value="120"></label>
<label class="field-label">
节点
<input id="graphNodeLimit" type="number" min="10" max="200" step="10" value="60">
</label>
<label class="field-label">
关系
<input id="graphEdgeLimit" type="number" min="10" max="300" step="10" value="120">
</label>
<button class="btn" type="submit">更新</button>
</form>
<div class="graph-toolbar-row">
@ -47,18 +56,18 @@
</div>
</div>
<!-- Graph Layout -->
<div class="graph-layout">
<!-- Graph Stage -->
<div class="panel graph-stage-panel">
<div class="graph-stage" id="graphStage">
<svg id="graphSvg" viewBox="0 0 960 640" preserveAspectRatio="xMidYMid meet"></svg>
</div>
</div>
<div class="panel detail-panel">
<div class="detail-card" id="graphDetail">
<div class="empty-state">点击节点或关系查看详情</div>
</div>
<div class="related-search" id="relatedSearch"></div>
<!-- Detail Panel -->
<div class="panel detail-panel" id="detailPanel">
<div class="empty-state">点击节点或关系查看详情</div>
</div>
</div>
</main>
@ -66,4 +75,4 @@
<script src="/graph.js"></script>
</body>
</html>
</html>

View File

@ -3,11 +3,12 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Meeting Memory Console</title>
<link rel="stylesheet" href="/styles.css">
<title>会议记忆中枢 — Meeting Memory</title>
<link rel="stylesheet" href="/static_v2/styles.css">
</head>
<body>
<div class="shell">
<!-- ====== Sidebar ====== -->
<aside class="sidebar">
<div class="brand">
<div class="brand-mark">M</div>
@ -18,18 +19,20 @@
</div>
<nav class="nav">
<a class="nav-link active" href="/index.html">总览面板</a>
<a class="nav-link" href="/graph.html">图谱浏览</a>
<a class="nav-link active" href="/">总览面板</a>
<a class="nav-link" href="/graph">图谱浏览</a>
</nav>
<div class="side-card sidebar-shortcuts">
<div class="sidebar-shortcuts">
<a class="pill-link" href="#import-panel">导入会议</a>
<a class="pill-link" href="#search-panel">知识检索</a>
<a class="pill-link" href="/graph.html">图谱页</a>
<a class="pill-link" href="/graph">图谱页</a>
</div>
</aside>
<!-- ====== Main ====== -->
<main class="main">
<!-- Toolbar -->
<div class="main-toolbar">
<div>
<p class="eyebrow">Dashboard</p>
@ -40,8 +43,10 @@
</div>
</div>
<!-- Highlight Cards -->
<section class="stats-grid" id="highlightGrid"></section>
<!-- Unified Panel: Import / Search / Stats -->
<section class="panel unified-panel">
<div class="unified-tabs">
<button class="unified-tab active" data-tab="import">导入</button>
@ -49,6 +54,7 @@
<button class="unified-tab" data-tab="stats">统计</button>
</div>
<!-- Import Pane -->
<div class="unified-pane" id="unifiedImport">
<form class="import-form" id="importForm">
<fieldset id="importFieldset" class="import-fieldset">
@ -73,6 +79,7 @@
</div>
</div>
<!-- Search Pane -->
<div class="unified-pane hidden" id="unifiedSearch">
<form class="search-box" id="searchForm">
<input id="searchInput" type="text" placeholder="搜索会议主题、负责人、指标、关系事实...">
@ -83,11 +90,13 @@
</div>
</div>
<!-- Stats Pane -->
<div class="unified-pane hidden" id="unifiedStats">
<div class="mini-stats" id="statsList"></div>
</div>
</section>
<!-- Content Grid -->
<div class="content-grid">
<section class="panel" id="meeting-list">
<div class="panel-head">
@ -124,6 +133,7 @@
</main>
</div>
<!-- Meeting Detail Dialog -->
<dialog class="detail-modal" id="meetingDialog">
<div class="dialog-head">
<div>
@ -138,4 +148,4 @@
<script src="/app.js"></script>
</body>
</html>
</html>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,186 @@
"""
Migration script: v1 (flat Entity + Fact nodes) v2 (composite labels + direct edges)
Steps:
1. Add composite Neo4j labels to existing Entity nodes based on entity_type
2. Convert Fact nodes to RELATES_TO edges between Entity nodes
3. Verify data integrity
"""
import logging
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from meeting_memory.graph_store import graph_store, _canonical_entity_type, _EntityType
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
logger = logging.getLogger('migrate')
def get_type_label_map() -> dict[str, str]:
"""Map canonical entity_type -> Neo4j label"""
return {
_EntityType.DEPARTMENT.value: 'Department',
_EntityType.PROJECT.value: 'Project',
_EntityType.METRIC.value: 'Metric',
_EntityType.PERSON.value: 'Person',
_EntityType.SYSTEM.value: 'System',
_EntityType.DOCUMENT.value: 'Document',
}
def step1_add_composite_labels():
"""Add composite labels (e.g., :Department) to existing Entity nodes."""
type_label_map = get_type_label_map()
total = 0
for canonical_type, label in type_label_map.items():
rows = graph_store.run_query(
'MATCH (e:Entity) WHERE e.entity_type = $etype RETURN count(e) AS cnt',
etype=canonical_type,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
logger.info(' No Entity with entity_type=%s to migrate', canonical_type)
continue
graph_store.run_query(
f'MATCH (e:Entity) WHERE e.entity_type = $etype SET e:{label}',
etype=canonical_type,
)
logger.info(' Added :%s label to %d Entity nodes', label, count)
total += count
# Also handle aliases: Organization -> Department
for alias in ('组织', 'Organization', '部门'):
rows = graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
etype=alias,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
continue
graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Department',
etype=alias, canonical=_EntityType.DEPARTMENT.value,
)
logger.info(' Redirected %d entities from entity_type=%s -> Department', count, alias)
total += count
for alias in ('指标', 'kpi', 'KPI'):
rows = graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
etype=alias,
)
count = rows[0]['cnt'] if rows else 0
if count == 0:
continue
graph_store.run_query(
'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Metric',
etype=alias, canonical=_EntityType.METRIC.value,
)
logger.info(' Redirected %d entities from entity_type=%s -> Metric', count, alias)
total += count
logger.info('Step 1 done: %d entities got composite labels', total)
def step2_convert_facts_to_edges():
"""Convert existing Fact nodes to RELATES_TO edges, then remove Fact nodes."""
facts = graph_store.run_query('''
MATCH (s:Entity)-[:FACT_SOURCE]->(f:Fact)-[:FACT_TARGET]->(t:Entity)
RETURN s.name AS source, t.name AS target,
f.predicate AS relation_type,
f.fact AS fact,
f.qualifiers AS qualifiers,
f.evidence AS evidence,
f.confidence AS confidence,
f.valid_at AS valid_at,
f.invalid_at AS invalid_at,
f.meeting_id AS meeting_id,
f.meeting_date AS meeting_date,
f.fact_embedding AS fact_embedding
''')
logger.info('Found %d Fact nodes to convert', len(facts))
converted = 0
for f in facts:
source = f.get('source', '')
target = f.get('target', '')
rtype = f.get('relation_type', '') or '关联'
if not source or not target:
continue
fact_embedding = f.get('fact_embedding') or []
graph_store.run_query('''
MATCH (s:Entity {name: $source})
MATCH (t:Entity {name: $target})
MERGE (s)-[r:RELATES_TO {name: $rtype}]->(t)
SET r.fact = $fact,
r.evidence = $evidence,
r.qualifiers = $qualifiers,
r.confidence = $confidence,
r.valid_at = $valid_at,
r.invalid_at = $invalid_at,
r.meeting_id = $meeting_id,
r.meeting_date = $meeting_date,
r.updated_at = datetime()
''',
source=source,
target=target,
rtype=rtype,
fact=f.get('fact', ''),
evidence=f.get('evidence', ''),
qualifiers=f.get('qualifiers', []),
confidence=f.get('confidence', 0.0),
valid_at=f.get('valid_at', ''),
invalid_at=f.get('invalid_at', ''),
meeting_id=f.get('meeting_id', ''),
meeting_date=f.get('meeting_date', ''),
)
if fact_embedding:
graph_store.run_query('''
MATCH (s:Entity {name: $source})-[r:RELATES_TO {name: $rtype}]->(t:Entity {name: $target})
SET r.fact_embedding = $embedding
''', source=source, target=target, rtype=rtype, embedding=fact_embedding)
converted += 1
# Now remove Fact nodes and their incident edges
graph_store.run_query('''
MATCH (f:Fact)
OPTIONAL MATCH (f)-[r]-()
DELETE r, f
''')
logger.info('Step 2 done: converted %d facts to edges, removed Fact nodes', converted)
def verify():
"""Verify migration results."""
stats = graph_store.get_stats()
logger.info('Final stats: %s', stats)
types = graph_store.get_entity_types()
logger.info('Entity types: %s', [(t['entity_type'], t['count']) for t in types])
kinds = graph_store.get_graph_kinds()
logger.info('Graph kinds: %s', [(k['kind'], k['count']) for k in kinds])
# Count labeled entities
for label in ('Department', 'Project', 'Metric', 'Person', 'System', 'Document'):
rows = graph_store.run_query(f'MATCH (n:{label}) RETURN count(n) AS cnt')
count = rows[0]['cnt'] if rows else 0
if count:
logger.info(' :%s nodes: %d', label, count)
edges = graph_store.run_query('MATCH ()-[r:RELATES_TO]->() RETURN count(r) AS cnt')
logger.info(' RELATES_TO edges: %d', edges[0]['cnt'] if edges else 0)
if __name__ == '__main__':
if not graph_store.enabled:
logger.error('Neo4j is not available')
sys.exit(1)
logger.info('Starting v1→v2 migration...')
step1_add_composite_labels()
step2_convert_facts_to_edges()
verify()
logger.info('Migration complete')