重构Neo4j保存数据结构

web更新
测试版本1
2026-06-15 13:13:20 +08:00 · 2026-06-15 09:17:21 +08:00 · 2026-06-12 10:57:32 +08:00
17 changed files with 3071 additions and 1689 deletions
--- a/meeting_memory/extractor.py
+++ b/meeting_memory/extractor.py
@ -2,12 +2,18 @@ import json
 import logging
 import re
 import sys
-from typing import List, Optional
+from enum import Enum
+from typing import Any, List, Optional

 from openai import OpenAI
 from pydantic import BaseModel, Field

 from meeting_memory.config import config
+from meeting_memory.prompts import extract_entities as prompt_extract_entities
+from meeting_memory.prompts import extract_facts as prompt_extract_facts
+from meeting_memory.prompts import resolve_entities as prompt_dedupe_nodes
+from meeting_memory.prompts import resolve_facts as prompt_dedupe_edges
+from meeting_memory.prompts import summarize_entity as prompt_summarize

 logger = logging.getLogger(__name__)

@ -17,52 +23,96 @@ client = OpenAI(
 )


+class EntityType(str, Enum):
+    DEPARTMENT = 'Department'
+    PROJECT = 'Project'
+    METRIC = 'Metric'
+    PERSON = 'Person'
+    SYSTEM = 'System'
+    DOCUMENT = 'Document'
+    PARTICIPANT = 'participant'
+    UNKNOWN = 'Unknown'
+
+
+# Normalization map: legacy LLM output → canonical type
+_ENTITY_TYPE_ALIASES = {
+    '组织': 'Department',
+    'organization': 'Department',
+    '部门': 'Department',
+    '指标': 'Metric',
+    'kpi': 'Metric',
+    '项目': 'Project',
+}
+
+
+def _canonical_entity_type(raw: str) -> str:
+    normalized = raw.strip()
+    if normalized in _ENTITY_TYPE_ALIASES:
+        return _ENTITY_TYPE_ALIASES[normalized]
+    for member in EntityType:
+        if member.value.lower() == normalized.lower():
+            return member.value
+    return EntityType.UNKNOWN.value
+
+
+def _neo4j_labels(entity_type: str) -> list[str]:
+    canonical = _canonical_entity_type(entity_type)
+    labels = ['Entity']
+    if canonical != EntityType.UNKNOWN.value:
+        labels.append(canonical)
+    return labels
+
+
 class Entity(BaseModel):
    name: str
-    entity_type: str
-    description: str = ""
+    entity_type: str = EntityType.UNKNOWN.value
+    description: str = ''


 class Relation(BaseModel):
-    subject: str
-    subject_type: str
-    predicate: str
-    object: str
-    object_type: str
-    description: str = ""
-    fact: str = ""
+    source_entity_name: str
+    target_entity_name: str
+    relation_type: str
+    fact: str = ''
+    valid_at: str = ''
+    invalid_at: str = ''
+    evidence: str = ''
    qualifiers: List[str] = Field(default_factory=list)
-    evidence: str = ""
    confidence: float = 0.0
-    valid_at: str = ""
-    invalid_at: str = ""


 class ActionItem(BaseModel):
    task: str
-    assignee: str = ""
-    deadline: str = ""
-    status: str = "待办"
-    priority: str = "中"
+    assignee: str = ''
+    deadline: str = ''
+    status: str = '待办'
+    priority: str = '中'


 class Decision(BaseModel):
    content: str
-    proposer: str = ""
-    status: str = "已决"
+    proposer: str = ''
+    status: str = '已决'


 class MeetingMetric(BaseModel):
    metric_name: str
    value: str
-    target: str = ""
-    owner: str = ""
-    trend: str = ""
+    target: str = ''
+    owner: str = ''
+    trend: str = ''
+    unit: str = ''
+
+
+class DepartmentInfo(BaseModel):
+    name: str
+    description: str = ''
+    projects: List[str] = Field(default_factory=list)


 class MeetingExtraction(BaseModel):
    title: str
-    date: str = ""
+    date: str = ''
    participants: List[str] = Field(default_factory=list)
    agenda: List[str] = Field(default_factory=list)
    entities: List[Entity] = Field(default_factory=list)
@ -70,15 +120,282 @@ class MeetingExtraction(BaseModel):
    action_items: List[ActionItem] = Field(default_factory=list)
    decisions: List[Decision] = Field(default_factory=list)
    metrics: List[MeetingMetric] = Field(default_factory=list)
-    summary: str = ""
+    departments: List[DepartmentInfo] = Field(default_factory=list)
+    summary: str = ''


+def _call_llm(
+    messages: list[dict],
+    response_model: type | None = None,
+    stream: bool = False,
+    max_tokens: int | None = None,
+) -> Any:
+    kwargs = {
+        'model': config.llm.model,
+        'messages': messages,
+        'max_tokens': max_tokens or config.llm.max_tokens,
+        'temperature': config.llm.temperature,
+    }
+    if response_model is not None:
+        kwargs['response_format'] = {'type': 'json_object'}
+    if stream:
+        kwargs['stream'] = True
+
+    if not stream:
+        response = client.chat.completions.create(**kwargs)
+        content = response.choices[0].message.content
+        if content is None:
+            raise ValueError('LLM returned empty response')
+        return content
+
+    kwargs['stream'] = True
+    response = client.chat.completions.create(**kwargs)
+    chunks: List[str] = []
+    print('\n[LLM] 开始流式输出：')
+    for event in response:
+        if not event.choices:
+            continue
+        delta = event.choices[0].delta.content
+        if not delta:
+            continue
+        chunks.append(delta)
+        sys.stdout.write(delta)
+        sys.stdout.flush()
+    print('\n[LLM] 输出结束')
+    return ''.join(chunks)
+
+
+def _try_parse_json(content: str) -> dict | list:
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError:
+        logger.warning('JSON parsing failed; trying to repair extracted block')
+        match = re.search(r'\{.*\}|\[.*\]', content, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group())
+            except json.JSONDecodeError as exc:
+                logger.error('Repaired JSON still failed to parse: %s', exc)
+        raise
+
+
+def _normalize_string(name: str) -> str:
+    return re.sub(r'[\s]+', ' ', name.strip().lower())
+
+
+def _format_episodes_for_context(episodes: list[dict] | None) -> str:
+    if not episodes:
+        return ''
+    return '\n'.join(
+        f'[Episode {i}] {ep.get("content", "")}'
+        for i, ep in enumerate(episodes)
+    )
+
+
+# ===== Step 1: 实体节点抽取 =====
+
+def extract_entities_from_text(
+    text: str,
+    previous_episodes: list[dict] | None = None,
+    entity_types: list[dict] | None = None,
+    stream: bool = False,
+) -> list[dict]:
+    context = {
+        'episode_content': text,
+        'previous_episodes': previous_episodes or [],
+        'entity_types': entity_types or [],
+    }
+    messages = prompt_extract_entities(context)
+    content = _call_llm(messages, stream=stream)
+    try:
+        data = _try_parse_json(content)
+    except Exception as exc:
+        logger.error('Failed to parse entity extraction result: %s', exc)
+        return []
+    if isinstance(data, dict):
+        data = data.get('entities', data.get('extracted_entities', []))
+    if not isinstance(data, list):
+        return []
+    result = []
+    for item in data:
+        if isinstance(item, dict) and item.get('name', '').strip():
+            result.append({
+                'name': item['name'].strip(),
+                'entity_type': item.get('entity_type', 'Entity'),
+                'description': item.get('description', ''),
+                'evidence': item.get('evidence', ''),
+            })
+    return result
+
+
+# ===== Step 2: 实体去重 =====
+
+def resolve_entities_against_graph(
+    extracted: list[dict],
+    existing: list[dict],
+    episode_content: str = '',
+) -> list[dict]:
+    if not existing:
+        return extracted
+
+    context = {
+        'extracted_entities': extracted,
+        'existing_entities': existing,
+        'episode_content': episode_content,
+    }
+    messages = prompt_dedupe_nodes(context)
+    content = _call_llm(messages)
+    try:
+        data = _try_parse_json(content)
+    except Exception as exc:
+        logger.warning('LLM dedup failed, keeping all extracted: %s', exc)
+        return extracted
+
+    if isinstance(data, dict):
+        data = data.get('entity_resolutions', data.get('resolutions', []))
+
+    extracted_by_id = {i: e for i, e in enumerate(extracted)}
+    existing_by_id = {c.get('candidate_id'): c for c in existing}
+
+    for resolution in (data if isinstance(data, list) else []):
+        if not isinstance(resolution, dict):
+            continue
+        rid = resolution.get('id')
+        dup_id = resolution.get('duplicate_candidate_id', -1)
+        if rid is None or rid not in extracted_by_id:
+            continue
+        if dup_id >= 0 and dup_id in existing_by_id:
+            extracted_by_id[rid]['_resolved_to'] = existing_by_id[dup_id]
+            extracted_by_id[rid]['name'] = resolution.get('name', extracted_by_id[rid]['name'])
+
+    return [e for e in extracted_by_id.values() if '_resolved_to' not in e]
+
+
+# ===== Step 3: 事实关系抽取 =====
+
+def extract_facts_from_text(
+    text: str,
+    entities: list[dict],
+    reference_time: str = '',
+    previous_episodes: list[dict] | None = None,
+    stream: bool = False,
+) -> list[dict]:
+    if len(entities) < 2:
+        return []
+
+    context = {
+        'episode_content': text,
+        'entities': entities,
+        'reference_time': reference_time,
+        'previous_episodes': previous_episodes or [],
+    }
+    messages = prompt_extract_facts(context)
+    content = _call_llm(messages, stream=stream)
+    try:
+        data = _try_parse_json(content)
+    except Exception as exc:
+        logger.error('Failed to parse fact extraction result: %s', exc)
+        return []
+
+    if isinstance(data, dict):
+        data = data.get('edges', data.get('facts', data.get('relations', [])))
+
+    if not isinstance(data, list):
+        return []
+
+    entity_names = {_normalize_string(e.get('name', '')) for e in entities}
+    result = []
+    for item in data:
+        if not isinstance(item, dict):
+            continue
+        src = _normalize_string(item.get('source_entity_name', ''))
+        tgt = _normalize_string(item.get('target_entity_name', ''))
+        if src not in entity_names or tgt not in entity_names:
+            continue
+        if src == tgt:
+            continue
+        result.append({
+            'source_entity_name': item['source_entity_name'],
+            'target_entity_name': item['target_entity_name'],
+            'relation_type': item.get('relation_type', '关联'),
+            'fact': item.get('fact', ''),
+            'valid_at': item.get('valid_at', ''),
+            'invalid_at': item.get('invalid_at', ''),
+            'evidence': item.get('evidence', ''),
+            'qualifiers': item.get('qualifiers', []),
+            'confidence': item.get('confidence', 0.0),
+        })
+    return result
+
+
+# ===== Step 4: 事实去重/矛盾检测 =====
+
+def resolve_facts_against_graph(
+    new_fact: dict,
+    existing_facts: list[dict],
+    invalidation_candidates: list[dict],
+) -> dict:
+    if not existing_facts:
+        return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
+
+    context = {
+        'new_fact': new_fact.get('fact', ''),
+        'existing_facts': existing_facts,
+        'invalidation_candidates': invalidation_candidates,
+    }
+    messages = prompt_dedupe_edges(context)
+    content = _call_llm(messages)
+    try:
+        data = _try_parse_json(content)
+    except Exception as exc:
+        logger.warning('Fact dedup failed, treating as new: %s', exc)
+        return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
+
+    if not isinstance(data, dict):
+        return {'is_duplicate': False, 'is_contradicted': False, 'resolved': new_fact}
+    return {
+        'is_duplicate': len(data.get('duplicate_facts', [])) > 0,
+        'is_contradicted': len(data.get('contradicted_facts', [])) > 0,
+        'resolved': new_fact,
+        'duplicate_facts': data.get('duplicate_facts', []),
+        'contradicted_facts': data.get('contradicted_facts', []),
+    }
+
+
+# ===== Step 5: 实体摘要 =====
+
+def extract_entity_summary(
+    entity_name: str,
+    episodes: list[str],
+    existing_summary: str = '',
+    previous_episodes: list[dict] | None = None,
+) -> str:
+    context = {
+        'entity_name': entity_name,
+        'episodes': episodes,
+        'existing_summary': existing_summary,
+        'previous_episodes': previous_episodes or [],
+    }
+    messages = prompt_summarize(context)
+    content = _call_llm(messages, max_tokens=1024)
+    try:
+        data = _try_parse_json(content)
+    except Exception:
+        logger.warning('Failed to parse summary, using empty')
+        return ''
+    if isinstance(data, dict):
+        return data.get('summary', '')
+    return ''
+
+
+# ===== 统一入口（兼容原有接口） =====
+
 EXTRACTION_SYSTEM_PROMPT = """
 你是一个专业的会议知识抽取助手。你的任务是从中文会议记录中抽取结构化事实，尤其要抽出更细粒度、更有语义深度的关系。

 输出要求：
 1. 只输出一个 JSON 对象，不要输出解释文字。
-2. 关系抽取不要停留在“部门汇报了工作”这种浅层描述，要尽可能向下细化到：
+2. 关系抽取不要停留在"部门汇报了工作"这种浅层描述，要尽可能向下细化到：
   - 责任归属
   - 目标值 / 当前值 / 趋势
   - 约束条件
@ -99,51 +416,9 @@ EXTRACTION_SYSTEM_PROMPT = """
 """


-def _call_llm(system: str, user: str, stream: bool = False) -> str:
-    if not stream:
-        response = client.chat.completions.create(
-            model=config.llm.model,
-            messages=[
-                {"role": "system", "content": system},
-                {"role": "user", "content": user},
-            ],
-            max_tokens=config.llm.max_tokens,
-            temperature=config.llm.temperature,
-        )
-        content = response.choices[0].message.content
-        if content is None:
-            raise ValueError("LLM returned empty response")
-        return content
-
-    response = client.chat.completions.create(
-        model=config.llm.model,
-        messages=[
-            {"role": "system", "content": system},
-            {"role": "user", "content": user},
-        ],
-        max_tokens=config.llm.max_tokens,
-        temperature=config.llm.temperature,
-        stream=True,
-    )
-
-    chunks: List[str] = []
-    print("\n[LLM] 开始抽取，流式输出中：")
-    for event in response:
-        if not event.choices:
-            continue
-        delta = event.choices[0].delta.content
-        if not delta:
-            continue
-        chunks.append(delta)
-        sys.stdout.write(delta)
-        sys.stdout.flush()
-    print("\n[LLM] 抽取输出结束")
-    return "".join(chunks)
-
-
 def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
    user_prompt = f"""
-请从下面会议记录中提取结构化信息，并重点做“深层关系抽取”。
+请从下面会议记录中提取结构化信息，并重点做"深层关系抽取"和"层次结构识别"。

 输出 JSON 字段：
 - title
@ -151,28 +426,32 @@ def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
 - participants
 - agenda
 - entities: name, entity_type, description
+  - entity_type 请使用: Department（部门）、Project（项目）、Metric（指标）、Person（人物）、System（系统）、Document（文档）
 - relations:
-  - subject
-  - subject_type
-  - predicate
-  - object
-  - object_type
-  - description
-  - fact
-  - qualifiers
-  - evidence
-  - confidence
-  - valid_at
-  - invalid_at
+  - source_entity_name: 源实体名称
+  - target_entity_name: 目标实体名称
+  - relation_type: 关系类型（如 HAS_PROJECT、HAS_METRIC、负责、汇报、目标值、推进、依赖）
+  - fact: 一句自然语言事实描述
+  - valid_at（可选）
+  - invalid_at（可选）
+  - evidence: 原文证据
+  - qualifiers: 限定条件列表
+  - confidence: 0~1
 - action_items: task, assignee, deadline, status, priority
 - decisions: content, proposer, status
- metrics: metric_name, value, target, owner, trend
+- metrics: metric_name, value, target, owner, trend, unit
+- departments: [{{"name": "部门名称", "description": "", "projects": ["项目名1", "项目名2"]}}]
 - summary

+层次关系规则：
+1. Department 管辖 Project → relation_type 用 HAS_PROJECT
+2. Project 拥有 Metric → relation_type 用 HAS_METRIC
+3. 其他事实关系（负责、汇报、目标值等）直接用 relation_type 表达
+
 关系抽取规则：
-1. 不要只抽“汇报了工作”这种会议动作，要尽量继续下钻出具体事实。
-2. 如果一句话里同时包含“主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势”，应拆成多条关系或在 qualifiers 中保留这些细节。
-3. 对于“要求、部署、负责、依赖、影响、约束、目标、风险”类信息优先保留。
+1. 不要只抽"汇报了工作"这种会议动作，要尽量继续下钻出具体事实。
+2. 如果一句话里同时包含"主体 + 指标 + 当前值 + 目标值 + 负责人 + 趋势"，应拆成多条关系或在 qualifiers 中保留这些细节。
+3. 对于"要求、部署、负责、依赖、影响、约束、目标、风险"类信息优先保留。
 4. fact 必须是一句完整、自然、可检索的事实描述。
 5. qualifiers 用于补充数值、范围、状态、条件、截止时间、优先级等信息。
 6. evidence 用原文中的关键词短句，不要太长。
@ -181,54 +460,43 @@ def extract_meeting_info(text: str, stream: bool = False) -> MeetingExtraction:
 会议记录如下：
 {text}
 """
-    content = _call_llm(EXTRACTION_SYSTEM_PROMPT, user_prompt, stream=stream)
+    content = _call_llm([
+        {'role': 'system', 'content': EXTRACTION_SYSTEM_PROMPT},
+        {'role': 'user', 'content': user_prompt},
+    ], stream=stream)
    data = _try_parse_json(content)
    data = _normalize_meeting_data(data)
    return MeetingExtraction(**data)


-def _try_parse_json(content: str) -> dict:
-    try:
-        return json.loads(content)
-    except json.JSONDecodeError:
-        logger.warning("JSON parsing failed; trying to repair extracted block")
-        match = re.search(r"\{.*\}", content, re.DOTALL)
-        if match:
-            try:
-                return json.loads(match.group())
-            except json.JSONDecodeError as exc:
-                logger.error("Repaired JSON still failed to parse: %s", exc)
-        raise
-
-
 def _normalize_meeting_data(data: dict) -> dict:
    if not isinstance(data, dict):
        return {}
-
    return {
-        "title": _as_str(data.get("title")),
-        "date": _as_str(data.get("date")),
-        "participants": _as_str_list(data.get("participants")),
-        "agenda": _as_str_list(data.get("agenda")),
-        "entities": _normalize_entities(data.get("entities")),
-        "relations": _normalize_relations(data.get("relations")),
-        "action_items": _normalize_action_items(data.get("action_items")),
-        "decisions": _normalize_decisions(data.get("decisions")),
-        "metrics": _normalize_metrics(data.get("metrics")),
-        "summary": _as_str(data.get("summary")),
+        'title': _as_str(data.get('title')),
+        'date': _as_str(data.get('date')),
+        'participants': _as_str_list(data.get('participants')),
+        'agenda': _as_str_list(data.get('agenda')),
+        'entities': _normalize_entities(data.get('entities')),
+        'relations': _normalize_relations(data.get('relations')),
+        'action_items': _normalize_action_items(data.get('action_items')),
+        'decisions': _normalize_decisions(data.get('decisions')),
+        'metrics': _normalize_metrics(data.get('metrics')),
+        'departments': _normalize_departments(data.get('departments')),
+        'summary': _as_str(data.get('summary')),
    }


 def _as_str(value) -> str:
    if value is None:
-        return ""
+        return ''
    if isinstance(value, str):
        return value
    return str(value)


 def _as_float(value) -> float:
-    if value is None or value == "":
+    if value is None or value == '':
        return 0.0
    try:
        numeric = float(value)
@ -244,7 +512,7 @@ def _as_str_list(value) -> List[str]:
            key_text = _as_str(key)
            value_text = _as_str(item)
            if key_text and value_text:
-                items.append(f"{key_text}: {value_text}")
+                items.append(f'{key_text}: {value_text}')
            elif key_text:
                items.append(key_text)
            elif value_text:
@ -262,50 +530,38 @@ def _normalize_entities(value) -> List[dict]:
    for entity in value:
        if not isinstance(entity, dict):
            continue
-        items.append(
-            {
-                "name": _as_str(entity.get("name")),
-                "entity_type": _as_str(entity.get("entity_type")),
-                "description": _as_str(entity.get("description")),
-            }
-        )
+        items.append({
+            'name': _as_str(entity.get('name')),
+            'entity_type': _as_str(entity.get('entity_type')),
+            'description': _as_str(entity.get('description')),
+        })
    return items


 def _normalize_relations(value) -> List[dict]:
    if not isinstance(value, list):
        return []
-
    items = []
    for relation in value:
        if not isinstance(relation, dict):
            continue
-
-        subject = _as_str(relation.get("subject"))
-        predicate = _as_str(relation.get("predicate"))
-        obj = _as_str(relation.get("object"))
-        description = _as_str(relation.get("description"))
-        fact = _as_str(relation.get("fact"))
-
-        if not fact and subject and predicate and obj:
-            fact = f"{subject} {predicate} {obj}"
-
-        items.append(
-            {
-                "subject": subject,
-                "subject_type": _as_str(relation.get("subject_type")),
-                "predicate": predicate,
-                "object": obj,
-                "object_type": _as_str(relation.get("object_type")),
-                "description": description,
-                "fact": fact,
-                "qualifiers": _as_str_list(relation.get("qualifiers")),
-                "evidence": _as_str(relation.get("evidence")),
-                "confidence": _as_float(relation.get("confidence")),
-                "valid_at": _as_str(relation.get("valid_at")),
-                "invalid_at": _as_str(relation.get("invalid_at")),
-            }
-        )
+        source = _as_str(relation.get('source_entity_name') or relation.get('subject', ''))
+        target = _as_str(relation.get('target_entity_name') or relation.get('object', ''))
+        rtype = _as_str(relation.get('relation_type') or relation.get('predicate', ''))
+        fact = _as_str(relation.get('fact'))
+        if not fact and source and rtype and target:
+            fact = f'{source} {rtype} {target}'
+        items.append({
+            'source_entity_name': source,
+            'target_entity_name': target,
+            'relation_type': rtype,
+            'fact': fact,
+            'qualifiers': _as_str_list(relation.get('qualifiers')),
+            'evidence': _as_str(relation.get('evidence')),
+            'confidence': _as_float(relation.get('confidence')),
+            'valid_at': _as_str(relation.get('valid_at')),
+            'invalid_at': _as_str(relation.get('invalid_at')),
+        })
    return items


@ -316,15 +572,13 @@ def _normalize_action_items(value) -> List[dict]:
    for action in value:
        if not isinstance(action, dict):
            continue
-        items.append(
-            {
-                "task": _as_str(action.get("task")),
-                "assignee": _as_str(action.get("assignee")),
-                "deadline": _as_str(action.get("deadline")),
-                "status": _as_str(action.get("status")) or "待办",
-                "priority": _as_str(action.get("priority")) or "中",
-            }
-        )
+        items.append({
+            'task': _as_str(action.get('task')),
+            'assignee': _as_str(action.get('assignee')),
+            'deadline': _as_str(action.get('deadline')),
+            'status': _as_str(action.get('status')) or '待办',
+            'priority': _as_str(action.get('priority')) or '中',
+        })
    return items


@ -335,13 +589,11 @@ def _normalize_decisions(value) -> List[dict]:
    for decision in value:
        if not isinstance(decision, dict):
            continue
-        items.append(
-            {
-                "content": _as_str(decision.get("content")),
-                "proposer": _as_str(decision.get("proposer")),
-                "status": _as_str(decision.get("status")) or "已决",
-            }
-        )
+        items.append({
+            'content': _as_str(decision.get('content')),
+            'proposer': _as_str(decision.get('proposer')),
+            'status': _as_str(decision.get('status')) or '已决',
+        })
    return items


@ -352,13 +604,30 @@ def _normalize_metrics(value) -> List[dict]:
    for metric in value:
        if not isinstance(metric, dict):
            continue
-        items.append(
-            {
-                "metric_name": _as_str(metric.get("metric_name")),
-                "value": _as_str(metric.get("value")),
-                "target": _as_str(metric.get("target")),
-                "owner": _as_str(metric.get("owner")),
-                "trend": _as_str(metric.get("trend")),
-            }
-        )
+        items.append({
+            'metric_name': _as_str(metric.get('metric_name')),
+            'value': _as_str(metric.get('value')),
+            'target': _as_str(metric.get('target')),
+            'owner': _as_str(metric.get('owner')),
+            'trend': _as_str(metric.get('trend')),
+            'unit': _as_str(metric.get('unit')),
+        })
+    return items
+
+
+def _normalize_departments(value) -> List[dict]:
+    if not isinstance(value, list):
+        return []
+    items = []
+    for dept in value:
+        if not isinstance(dept, dict):
+            continue
+        name = _as_str(dept.get('name'))
+        if not name:
+            continue
+        items.append({
+            'name': name,
+            'description': _as_str(dept.get('description')),
+            'projects': _as_str_list(dept.get('projects')),
+        })
    return items
--- a/meeting_memory/graph_store.py
+++ b/meeting_memory/graph_store.py
--- a/meeting_memory/meeting_processor.py
+++ b/meeting_memory/meeting_processor.py
@ -1,9 +1,18 @@
 import hashlib
 import logging
-from typing import Callable, Optional
+from typing import Callable, List, Optional

 from meeting_memory.config import config
-from meeting_memory.extractor import MeetingExtraction, extract_meeting_info
+from meeting_memory.extractor import (
+    MeetingExtraction,
+    extract_entities_from_text,
+    extract_facts_from_text,
+    extract_meeting_info as monolithic_extract,
+)
+from meeting_memory.extractor import (
+    resolve_entities_against_graph,
+    resolve_facts_against_graph,
+)
 from meeting_memory.graph_store import graph_store
 from meeting_memory.meeting_state import MeetingStateStore
 from meeting_memory.raw_store import raw_meeting_store
@ -15,8 +24,9 @@ ProgressCallback = Callable[[int, int, str], None]


 class MeetingProcessor:
+
    def process_meeting_file(self, filepath: str, force: bool = False) -> Optional[str]:
-        with open(filepath, "r", encoding="utf-8") as file_obj:
+        with open(filepath, 'r', encoding='utf-8') as file_obj:
            text = file_obj.read()
        return self.process_meeting_text(text, force=force)

@ -26,147 +36,313 @@ class MeetingProcessor:
        force: bool = False,
        interactive: bool = True,
        progress_callback: Optional[ProgressCallback] = None,
+        use_multistep_extraction: bool = True,
    ) -> Optional[str]:
-        def report(step: int, message: str) -> None:
+        def report(step: int, total: int, message: str) -> None:
            if progress_callback:
-                progress_callback(step, 7, message)
-            print(f"[{step}/7] {message}")
+                progress_callback(step, total, message)
+            print(f'[{step}/{total}] {message}')

-        report(1, "计算内容哈希")
+        if use_multistep_extraction:
+            return self._process_multistep(text, force, interactive, report)
+        else:
+            return self._process_monolithic(text, force, interactive, report)
+
+    def _process_monolithic(
+        self, text: str, force: bool, interactive: bool,
+        report: Callable,
+    ) -> Optional[str]:
+        total_steps = 7
+        report(1, total_steps, '计算内容哈希')
        content_hash = self._compute_content_hash(text)

        if not force and state_store.has_content_hash(content_hash):
-            logger.info("Duplicate content hash skipped: %s", content_hash[:12])
+            logger.info('Duplicate content hash skipped: %s', content_hash[:12])
            return None

        if not force:
-            report(2, "Neo4j 语义相似去重检索")
+            report(2, total_steps, 'Neo4j 语义相似去重检索')
            similar = graph_store.find_similar_episode(text, threshold=0.92)
            if similar:
-                meta = similar["metadata"]
+                meta = similar['metadata']
                if not interactive:
-                    logger.info(
-                        "Skipped similar meeting in non-interactive mode: %s",
-                        meta.get("title", ""),
-                    )
+                    logger.info('Skipped similar meeting: %s', meta.get('title', ''))
                    return None
-
-                print(
-                    f"\n发现相似会议：{meta.get('title', '')} ({meta.get('date', '')}) "
-                    f"相似度 {similar['score']:.2%}"
-                )
+                print(f'\n发现相似会议：{meta.get("title", "")} ({meta.get("date", "")}) 相似度 {similar["score"]:.2%}')
                while True:
-                    choice = input("选择 [s]跳过 / [o]覆盖（默认 s）：").strip().lower() or "s"
-                    if choice == "s":
-                        logger.info("Skipped similar meeting: %s", meta.get("title", ""))
+                    choice = input('选择 [s]跳过 / [o]覆盖（默认 s）：').strip().lower() or 's'
+                    if choice == 's':
+                        logger.info('Skipped similar meeting: %s', meta.get('title', ''))
                        return None
-                    if choice == "o":
+                    if choice == 'o':
                        force = True
                        break
-                    print("请输入 s 或 o。")
+                    print('请输入 s 或 o。')
        else:
-            report(2, "跳过语义去重，按覆盖模式继续")
+            report(2, total_steps, '跳过语义去重，按覆盖模式继续')

-        report(3, "调用大模型抽取结构化信息")
-        meeting_data = self._extract(text)
+        report(3, total_steps, '调用大模型抽取结构化信息（单步模式）')
+        meeting_data = self._extract_monolithic(text)
        if not meeting_data:
-            logger.error("Failed to extract meeting information")
+            logger.error('Failed to extract meeting information')
            return None

        data_dict = meeting_data.model_dump()
-        data_dict["_content_hash"] = content_hash
-        data_dict["_graph_meeting_id"] = graph_store.meeting_id(data_dict)
+        return self._finish_pipeline(data_dict, content_hash, text, force, interactive, report, total_steps)

-        report(4, "检查标题和日期重复")
+    def _process_multistep(
+        self, text: str, force: bool, interactive: bool,
+        report: Callable,
+    ) -> Optional[str]:
+        total_steps = 10
+        report(1, total_steps, '计算内容哈希')
+        content_hash = self._compute_content_hash(text)
+
+        if not force and state_store.has_content_hash(content_hash):
+            logger.info('Duplicate content hash skipped: %s', content_hash[:12])
+            return None
+
+        if not force:
+            report(2, total_steps, 'Neo4j 语义相似去重检索')
+            similar = graph_store.find_similar_episode(text, threshold=0.92)
+            if similar:
+                meta = similar['metadata']
+                if not interactive:
+                    logger.info('Skipped similar meeting: %s', meta.get('title', ''))
+                    return None
+                print(f'\n发现相似会议：{meta.get("title", "")} ({meta.get("date", "")}) 相似度 {similar["score"]:.2%}')
+                while True:
+                    choice = input('选择 [s]跳过 / [o]覆盖（默认 s）：').strip().lower() or 's'
+                    if choice == 's':
+                        logger.info('Skipped similar meeting: %s', meta.get('title', ''))
+                        return None
+                    if choice == 'o':
+                        force = True
+                        break
+                    print('请输入 s 或 o。')
+        else:
+            report(2, total_steps, '跳过语义去重，按覆盖模式继续')
+
+        # Step 3: 提取标题、日期、参与人等元信息
+        report(3, total_steps, '抽取会议元信息（标题、日期、参与者等）')
+        meta_info = self._extract_monolithic(text, stream=interactive)
+        if not meta_info:
+            logger.error('Failed to extract meeting metadata')
+            return None
+        data_dict = meta_info.model_dump()
+        data_dict['_content_hash'] = content_hash
+        data_dict['_graph_meeting_id'] = graph_store.meeting_id(data_dict)
+        data_dict['_original_text'] = text
+
+        # Step 4: 抽取实体节点（LLM 调用 1）
+        report(4, total_steps, '第 1 步实体抽取：识别会议中提及的实体')
+        use_stream = interactive
+        previous_episodes = self._get_previous_episodes_context(data_dict)
+        extracted_entities = extract_entities_from_text(
+            text, previous_episodes=previous_episodes, stream=use_stream
+        )
+        logger.info('Extracted %d entities from meeting', len(extracted_entities))
+        if not extracted_entities:
+            logger.warning('No entities extracted, aborting')
+            return None
+
+        # Step 5: 实体去重（与已有图谱对比 + LLM 裁决）
+        report(5, total_steps, '实体去重：与图谱中已有实体对比')
+        resolved_entities = self._dedup_entities(extracted_entities, text)
+        data_dict['entities'] = resolved_entities
+        logger.info('After dedup: %d entities remain', len(resolved_entities))
+
+        # Step 6: 抽取事实关系（LLM 调用 2）
+        report(6, total_steps, '事实抽取：提取实体间的结构化关系')
+        reference_time = data_dict.get('date', '')
+        extracted_facts = extract_facts_from_text(
+            text, resolved_entities,
+            reference_time=reference_time,
+            previous_episodes=previous_episodes,
+            stream=use_stream,
+        )
+        logger.info('Extracted %d facts from meeting', len(extracted_facts))
+
+        # Step 7: 事实去重与矛盾检测
+        report(7, total_steps, '事实解析：去重与矛盾检测')
+        resolved_facts = self._dedup_facts(extracted_facts, data_dict)
+        data_dict['relations'] = resolved_facts
+        logger.info('After dedup: %d facts remain', len(resolved_facts))
+
+        # Step 8: 检查标题和日期重复
+        report(8, total_steps, '检查标题和日期重复')
        should_skip = self._handle_duplicate(data_dict, force=force, interactive=interactive)
        if should_skip:
            return None

-        meeting_title = data_dict.get("title", "")
-        meeting_date = data_dict.get("date", "")
+        meeting_title = data_dict.get('title', '')
+        meeting_date = data_dict.get('date', '')

-        report(5, "归档原始会议文本")
+        # Step 9: 归档 + 合并行动项/指标
+        report(9, total_steps, '归档和状态合并')
        raw_path = raw_meeting_store.save(text, title=meeting_title, date=meeting_date)
-        data_dict["_original_text"] = text
-        data_dict["_original_text_path"] = raw_path
+        data_dict['_original_text_path'] = raw_path

-        meeting_filename = f"{graph_store.meeting_id(data_dict)}.md"
-
-        report(6, "合并行动项和指标状态")
-        data_dict["action_items"] = state_store.merge_action_items(
-            data_dict.get("action_items", []),
-            meeting_title,
-            meeting_date,
-            meeting_filename,
+        meeting_filename = f'{graph_store.meeting_id(data_dict)}.md'
+        data_dict['action_items'] = state_store.merge_action_items(
+            data_dict.get('action_items', []),
+            meeting_title, meeting_date, meeting_filename,
        )
-        data_dict["metrics"] = state_store.merge_metrics(
-            data_dict.get("metrics", []),
-            meeting_title,
-            meeting_date,
-            meeting_filename,
+        data_dict['metrics'] = state_store.merge_metrics(
+            data_dict.get('metrics', []),
+            meeting_title, meeting_date, meeting_filename,
        )
-
        state_store.add_content_hash(content_hash, meeting_title, meeting_date, meeting_filename)
        state_store.save()

-        report(7, "写入 Neo4j 图谱和检索数据")
+        # Step 10: 写入 Neo4j
+        report(10, total_steps, '写入 Neo4j 图谱')
        graph_store.upsert_meeting_subgraph(data_dict)

-        logger.info("Meeting processed: %s", meeting_title)
+        logger.info('Meeting processed (multi-step): %s', meeting_title)
+        return raw_path
+
+    def _get_previous_episodes_context(self, data_dict: dict) -> list:
+        meeting_title = data_dict.get('title', '')
+        meeting_date = data_dict.get('date', '')
+        series_info = state_store.get_series_info(meeting_title)
+        if not series_info:
+            return []
+        processed = series_info.get('processed_titles', [])
+        if not processed:
+            return []
+        rows = graph_store.run_query('''
+            MATCH (m:Meeting)
+            WHERE m.title IN $titles
+            OPTIONAL MATCH (m)-[:HAS_EPISODE]->(ep:Episode)
+            RETURN m.title AS title, m.date AS date, ep.summary AS summary, ep.content AS content
+            ORDER BY m.date DESC
+            LIMIT 3
+        ''', titles=processed[-3:])
+        return [{'content': r.get('content', r.get('summary', '')), 'timestamp': r.get('date', '')} for r in rows]
+
+    def _dedup_entities(self, extracted: list, text: str) -> list:
+        try:
+            existing = graph_store.get_entities_map()
+            if not existing:
+                return extracted
+            existing_list = [
+                {
+                    'candidate_id': i,
+                    'name': v['name'],
+                    'entity_type': v.get('entity_type', ''),
+                    'summary': v.get('summary', '') or v.get('description', ''),
+                }
+                for i, v in enumerate(existing.values())
+            ]
+            return resolve_entities_against_graph(extracted, existing_list, episode_content=text)
+        except Exception as exc:
+            logger.warning('Entity dedup failed, keeping all extracted: %s', exc)
+            return extracted
+
+    def _dedup_facts(self, facts: list, data_dict: dict) -> list:
+        resolved = []
+        for fact in facts:
+            try:
+                source = fact.get('source_entity_name', '')
+                target = fact.get('target_entity_name', '')
+                existing = graph_store.get_facts_between(source, target)
+                if not existing:
+                    resolved.append(fact)
+                    continue
+                result = resolve_facts_against_graph(fact, existing, [])
+                if isinstance(result, dict) and result.get('is_duplicate'):
+                    logger.debug('Skipped duplicate fact: %s', fact.get('fact', ''))
+                    continue
+                resolved.append(fact)
+            except Exception as exc:
+                logger.warning('Fact dedup failed, keeping: %s', exc)
+                resolved.append(fact)
+        return resolved
+
+    def _finish_pipeline(
+        self, data_dict: dict, content_hash: str, text: str,
+        force: bool, interactive: bool, report: Callable, total_steps: int,
+    ) -> Optional[str]:
+        data_dict['_content_hash'] = content_hash
+        data_dict['_graph_meeting_id'] = graph_store.meeting_id(data_dict)
+
+        report(4, total_steps, '检查标题和日期重复')
+        should_skip = self._handle_duplicate(data_dict, force=force, interactive=interactive)
+        if should_skip:
+            return None
+
+        meeting_title = data_dict.get('title', '')
+        meeting_date = data_dict.get('date', '')
+
+        report(5, total_steps, '归档原始会议文本')
+        raw_path = raw_meeting_store.save(text, title=meeting_title, date=meeting_date)
+        data_dict['_original_text'] = text
+        data_dict['_original_text_path'] = raw_path
+
+        meeting_filename = f'{graph_store.meeting_id(data_dict)}.md'
+
+        report(6, total_steps, '合并行动项和指标状态')
+        data_dict['action_items'] = state_store.merge_action_items(
+            data_dict.get('action_items', []), meeting_title, meeting_date, meeting_filename,
+        )
+        data_dict['metrics'] = state_store.merge_metrics(
+            data_dict.get('metrics', []), meeting_title, meeting_date, meeting_filename,
+        )
+        state_store.add_content_hash(content_hash, meeting_title, meeting_date, meeting_filename)
+        state_store.save()
+
+        report(7, total_steps, '写入 Neo4j 图谱和检索数据')
+        graph_store.upsert_meeting_subgraph(data_dict)
+
+        logger.info('Meeting processed: %s', meeting_title)
        return raw_path

    def _handle_duplicate(self, data_dict: dict, force: bool, interactive: bool = True) -> bool:
-        title = data_dict.get("title", "")
-        date = data_dict.get("date", "")
+        title = data_dict.get('title', '')
+        date = data_dict.get('date', '')
        existing = graph_store.get_meeting(title, date)
-
        if not existing:
            return False
-
        if force:
-            logger.info("Duplicate meeting found; overwriting in force mode: %s", title)
+            logger.info('Duplicate meeting found; overwriting in force mode: %s', title)
            self._remove_old(data_dict, existing)
            return False
-
        if not interactive:
-            logger.info("Skipped duplicate meeting in non-interactive mode: %s", title)
+            logger.info('Skipped duplicate meeting in non-interactive mode: %s', title)
            return True
-
-        print(f"\n发现重复会议：{title} ({date})")
+        print(f'\n发现重复会议：{title} ({date})')
        while True:
-            choice = input("选择 [s]跳过 / [o]覆盖（默认 s）：").strip().lower() or "s"
-            if choice == "s":
-                logger.info("Skipped duplicate meeting: %s", title)
+            choice = input('选择 [s]跳过 / [o]覆盖（默认 s）：').strip().lower() or 's'
+            if choice == 's':
+                logger.info('Skipped duplicate meeting: %s', title)
                return True
-            if choice == "o":
+            if choice == 'o':
                self._remove_old(data_dict, existing)
                return False
-            print("请输入 s 或 o。")
+            print('请输入 s 或 o。')

    def _remove_old(self, data_dict: dict, existing: Optional[dict] = None) -> None:
        meeting_id = graph_store.meeting_id(data_dict)
        graph_store.remove_meeting_subgraph(meeting_id)
-
-        new_hash = data_dict.get("_content_hash", "")
+        new_hash = data_dict.get('_content_hash', '')
        if new_hash:
            state_store.remove_content_hash(new_hash)
-
        if existing:
-            old_hash = existing.get("content_hash", "")
+            old_hash = existing.get('content_hash', '')
            if old_hash and old_hash != new_hash:
                state_store.remove_content_hash(old_hash)
-
-        logger.info("Removed old meeting artifacts: %s", data_dict.get("title", ""))
+        logger.info('Removed old meeting artifacts: %s', data_dict.get('title', ''))

    def _compute_content_hash(self, text: str) -> str:
-        normalized = text.strip().replace("\r\n", "\n")
-        return hashlib.sha256(normalized.encode("utf-8")).hexdigest()
+        normalized = text.strip().replace('\r\n', '\n')
+        return hashlib.sha256(normalized.encode('utf-8')).hexdigest()

-    def _extract(self, text: str) -> Optional[MeetingExtraction]:
+    def _extract_monolithic(self, text: str, *, stream: bool = True) -> Optional[MeetingExtraction]:
        try:
-            return extract_meeting_info(text, stream=True)
+            return monolithic_extract(text, stream=stream)
        except Exception as exc:
-            logger.error("LLM extraction failed: %s", exc)
+            logger.error('LLM extraction failed: %s', exc)
            return None

    def query(self, question: str, top_k: int = 3) -> str:
@ -174,10 +350,10 @@ class MeetingProcessor:

    def stats(self) -> dict:
        return {
-            "graph": graph_store.get_stats(),
-            "state": state_store.get_stats(),
-            "raw_dir": config.storage.raw_dir,
-            "state_path": config.state_path,
+            'graph': graph_store.get_stats(),
+            'state': state_store.get_stats(),
+            'raw_dir': config.storage.raw_dir,
+            'state_path': config.state_path,
        }


--- a/meeting_memory/prompts/init.py
+++ b/meeting_memory/prompts/init.py
@ -0,0 +1,5 @@
+from .extract_nodes import extract_entities
+from .extract_edges import extract_facts
+from .dedupe_nodes import resolve_entities
+from .dedupe_edges import resolve_facts
+from .summarize_nodes import summarize_entity
--- a/meeting_memory/prompts/dedupe_edges.py
+++ b/meeting_memory/prompts/dedupe_edges.py
@ -0,0 +1,49 @@
+from typing import Any
+
+
+def resolve_facts(context: dict[str, Any]) -> list[dict]:
+    existing_facts = context.get('existing_facts', [])
+    new_fact = context.get('new_fact', '')
+    invalidation_candidates = context.get('invalidation_candidates', [])
+
+    existing_text = '\n'.join(
+        f'  [idx={i}] {f.get("fact", "")}' for i, f in enumerate(existing_facts)
+    )
+
+    invalidation_text = '\n'.join(
+        f'  [idx={i + len(existing_facts)}] {f.get("fact", "")}'
+        for i, f in enumerate(invalidation_candidates)
+    )
+
+    user_prompt = f"""
+<已有事实>
+{existing_text}
+</已有事实>
+
+<事实失效候选>
+{invalidation_text}
+</事实失效候选>
+
+<新事实>
+{new_fact}
+</新事实>
+
+注意：idx 编号是连续的——已有事实从 0 开始，失效候选紧随其后。
+
+任务：
+1. **重复检测**：如果<新事实>与<已有事实>中的某条描述的是完全相同的客观事实，返回该 idx。
+2. **矛盾检测**：如果<新事实>与<已有事实>或<失效候选>中的某条相互矛盾（如状态已更新、数值已变更），返回该 idx。
+
+返回格式：
+{{"duplicate_facts": [idx列表], "contradicted_facts": [idx列表]}}
+如果没有重复或矛盾，返回空列表。
+
+示例：
+- 新事实："张三负责宽带运维项目" vs 已有："张三负责宽带运维" → 重复（相同事实）
+- 新事实："宽带用户数当前值 8500" vs 已有："宽带用户数目标值 10000" → 不重复，不矛盾（数值维度不同）
+- 新事实："宽带用户数当前值 9000" vs 已有："宽带用户数 8000" → 矛盾（同一指标数值更新）
+"""
+    return [
+        {'role': 'system', 'content': '你是事实去重和矛盾检测助手。判断新事实与已有事实的关系。'},
+        {'role': 'user', 'content': user_prompt},
+    ]
--- a/meeting_memory/prompts/dedupe_nodes.py
+++ b/meeting_memory/prompts/dedupe_nodes.py
@ -0,0 +1,49 @@
+from typing import Any
+
+
+def resolve_entities(context: dict[str, Any]) -> list[dict]:
+    extracted = context.get('extracted_entities', [])
+    existing = context.get('existing_entities', [])
+    episode_content = context.get('episode_content', '')
+
+    extracted_text = '\n'.join(
+        f'  [{i}] {e.get("name", "")}（{e.get("entity_type", "未知")}）：{e.get("description", "")}'
+        for i, e in enumerate(extracted)
+    )
+
+    existing_text = '\n'.join(
+        f'  [candidate_id={c.get("candidate_id", i)}] {c.get("name", "")}（{c.get("entity_type", "未知")}）：{c.get("summary", "")[:100]}'
+        for i, c in enumerate(existing)
+    )
+
+    user_prompt = f"""
+<当前会议内容>
+{episode_content}
+</当前会议内容>
+
+<新抽取的实体>
+{extracted_text}
+</新抽取的实体>
+
+<图谱中已有的实体>
+{existing_text}
+</图谱中已有的实体>
+
+任务：判断<新抽取的实体>中的每一个是否与<图谱中已有的实体>中的某个是同一个真实世界对象。
+
+判断标准：
+- **是重复**：两个名称指向同一个真实世界的人、组织、地点、项目、指标等。
+- **不是重复**：名称相似但指向不同实体（如两个同名但不同的人、同名的不同项目）。
+
+对每个新抽取的实体，返回：
+  - id: 对应新抽取实体列表中的序号
+  - name: 实体的最佳名称（优先使用已有实体中的更完整名称）
+  - duplicate_candidate_id: 匹配到的已有实体的 candidate_id，如果无匹配则填 -1
+
+返回格式 JSON 数组：[{{"id": 0, "name": "张三", "duplicate_candidate_id": -1}}, ...]
+必须为新抽取的每个实体返回一条记录。id 从 0 开始连续编号。
+"""
+    return [
+        {'role': 'system', 'content': '你是实体去重助手。判断两个实体是否指向同一个真实世界对象。'},
+        {'role': 'user', 'content': user_prompt},
+    ]
--- a/meeting_memory/prompts/extract_edges.py
+++ b/meeting_memory/prompts/extract_edges.py
@ -0,0 +1,66 @@
+from typing import Any
+
+
+def extract_facts(context: dict[str, Any]) -> list[dict]:
+    previous = context.get('previous_episodes', [])
+    current = context.get('episode_content', '')
+    entities = context.get('entities', [])
+    reference_time = context.get('reference_time', '')
+
+    previous_section = ''
+    if previous:
+        import json
+        previous_section = f'\n<历史上下文>\n{json.dumps(previous, ensure_ascii=False)}\n</历史上下文>\n'
+
+    entities_text = '\n'.join(
+        f'  [{i}] {e.get("name", "")}（{e.get("entity_type", "未知")}）' for i, e in enumerate(entities)
+    )
+
+    user_prompt = f"""
+{previous_section}
+<当前会议内容>
+{current}
+</当前会议内容>
+
+<已抽取实体>
+{entities_text}
+</已抽取实体>
+
+<参考时间>
+{reference_time}
+</参考时间>
+
+抽取规则：
+1. 从<当前会议内容>中抽取上述<已抽取实体>之间的**事实关系**。
+2. 每条关系必须涉及两个**不同**的实体。
+3. 返回 JSON 数组，格式：
+   [{{
+     "source_entity_name": "源实体名称（必须来自上方的实体列表）",
+     "target_entity_name": "目标实体名称（必须来自上方的实体列表）",
+     "relation_type": "关系类型，如 负责、汇报、隶属于、参与、目标值、截止于、影响、依赖于",
+     "fact": "一句自然语言的事实描述，保留原文中所有具体细节（数值、时间、地点等）",
+     "valid_at": "该事实开始成立的时间（ISO 8601格式，如 2025-04-30T00:00:00Z），不明确则留空",
+     "invalid_at": "该事实不再成立的时间，不明确则留空",
+     "evidence": "原文中的关键证据短句",
+     "qualifiers": ["限定条件列表，如数值、范围、状态、截止时间等"],
+     "confidence": 置信度0到1之间
+   }}]
+
+4. relation_type 避免使用"关联""涉及"等空泛词，优先使用具体谓词：
+    负责、汇报、目标值、当前值、低于、高于、要求、督导、推进、支撑、依赖、计划、完成、截止于、参与、隶属于、分管、协调、审批
+
+5. 层次关系（结构隶属）使用以下固定 relation_type：
+    HAS_PROJECT:  部门管辖项目（Department -> Project）
+    HAS_METRIC:   项目拥有指标（Project -> Metric）
+    PART_OF:      实体属于某个上级实体
+
+6. 同一对实体之间可能既有层次关系（HAS_PROJECT）也有事实关系（负责、汇报），需要分别抽取。
+
+7. fact 必须是一句完整的自然语言事实，保留所有具体信息（人名、数值、产品名、地点等）。
+
+8. 如果根据上下文可以判断事实的开始/结束时间，填入 valid_at / invalid_at。
+"""
+    return [
+        {'role': 'system', 'content': '你是一个专业的事实关系抽取专家。从会议记录中抽取实体间的结构化事实关系。'},
+        {'role': 'user', 'content': user_prompt},
+    ]
--- a/meeting_memory/prompts/extract_nodes.py
+++ b/meeting_memory/prompts/extract_nodes.py
@ -0,0 +1,56 @@
+from typing import Any
+
+
+SYSTEM_PROMPT = (
+    '你是会议纪要实体抽取专家。'
+    '从会议记录中抽取明确的实体节点，包括部门（Department）、项目（Project）、指标（Metric）、人物（Person）、系统（System）、文档（Document）等。'
+    '不要抽取抽象概念、情感、时间日期或泛泛的名词。'
+)
+
+
+def extract_entities(context: dict[str, Any]) -> list[dict]:
+    previous = context.get('previous_episodes', [])
+    current = context.get('episode_content', '')
+    entity_types = context.get('entity_types', [])
+
+    entity_types_section = ''
+    if entity_types:
+        entity_types_section = '\n'.join(
+            f'  - {t["type"]}: {t["description"]}' for t in entity_types
+        )
+    else:
+        entity_types_section = '  - 未限定类型，请根据上下文自行判断'
+
+    previous_section = ''
+    if previous:
+        import json
+        previous_section = f'\n<历史上下文>\n{json.dumps(previous, ensure_ascii=False)}\n</历史上下文>\n'
+
+    user_prompt = f"""
+{previous_section}
+<当前会议内容>
+{current}
+</当前会议内容>
+
+<实体类型>
+{entity_types_section}
+</实体类型>
+
+抽取规则：
+1. 只抽取当前会议内容中**明确提及**的实体。
+2. 每个实体必须是有唯一标识的具体事物——人名、组织名、地名、项目名、指标名称等。
+3. 不要抽取：代词（他、她、它、这、那）、抽象概念（增长、改善、风险）、时间日期。
+4. 如果同一实体在不同来源中以不同名称出现（如简称/全称），保留最完整的形式。
+5. 必须返回 JSON 数组，格式：[{{"name": "实体名称", "entity_type": "类型", "description": "描述", "evidence": "原文证据"}}]
+6. description 写一段对该实体的简要描述（20字以内）。
+7. evidence 从原文中摘录提及该实体的关键短句。
+
+注意：实体类型建议使用 Department（部门）、Project（项目）、Metric（指标）、Person（人物）、System（系统）、Document（文档）等。请确保：
+- 部门（Department）：会议中提到的具体部门名称，如"技术部"、"市场部"。
+- 项目（Project）：部门负责的具体项目名称。
+- 指标（Metric）：项目中提到的具体量化指标，如"响应时间"、"完成率"。
+"""
+    return [
+        {'role': 'system', 'content': SYSTEM_PROMPT},
+        {'role': 'user', 'content': user_prompt},
+    ]
--- a/meeting_memory/prompts/summarize_nodes.py
+++ b/meeting_memory/prompts/summarize_nodes.py
@ -0,0 +1,41 @@
+from typing import Any
+
+
+def summarize_entity(context: dict[str, Any]) -> list[dict]:
+    entity_name = context.get('entity_name', '')
+    existing_summary = context.get('existing_summary', '')
+    episodes = context.get('episodes', [])
+    previous = context.get('previous_episodes', [])
+
+    existing_section = ''
+    if existing_summary:
+        existing_section = f'\n<已有摘要>\n{existing_summary}\n</已有摘要>\n'
+
+    previous_section = ''
+    if previous:
+        import json
+        previous_section = f'\n<历史内容>\n{json.dumps(previous, ensure_ascii=False)}\n</历史内容>\n'
+
+    episodes_text = '\n---\n'.join(episodes) if isinstance(episodes, list) else episodes
+
+    user_prompt = f"""
+{previous_section}
+<当前内容>
+{episodes_text}
+</当前内容>
+{existing_section}
+为实体 **{entity_name}** 生成一段信息密集的摘要。
+
+规则：
+1. 只使用<当前内容>和<已有摘要>中的事实。不要推测。
+2. 保留所有实质性的人名、角色、地点、日期、数值。
+3. 用第三人称直接陈述事实。
+4. 不要使用"提及了""讨论了""指出"等元语言动词。直接陈述事实。
+5. 如果会议对已有信息做了更新，采用更新的说法。
+6. 摘要不超过 500 字。
+7. 返回 JSON：{{"summary": "摘要内容"}}
+"""
+    return [
+        {'role': 'system', 'content': '你是实体摘要助手。根据会议内容为实体生成信息密集的摘要。'},
+        {'role': 'user', 'content': user_prompt},
+    ]
--- a/meeting_memory/web_demo/server.py
+++ b/meeting_memory/web_demo/server.py
@ -20,6 +20,7 @@ from meeting_memory.meeting_processor import meeting_processor, state_store
 logger = logging.getLogger(__name__)

 STATIC_DIR = Path(__file__).resolve().parent / "static"
+STATIC_V2_DIR = Path(__file__).resolve().parent / "static_v2"
 RAW_DIR = Path(config.storage.raw_dir)
 IMPORT_JOBS = {}
 IMPORT_JOBS_LOCK = threading.Lock()
@ -29,8 +30,22 @@ class GraphDemoHandler(SimpleHTTPRequestHandler):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, directory=str(STATIC_DIR), **kwargs)

+    # ── Route: serve /static_v2/* from the v2 directory ──
+    def translate_path(self, path):
+        parsed = urlparse(path)
+        raw = parsed.path
+
+        # Serve /static_v2/* from static_v2 directory
+        if raw.startswith("/static_v2/"):
+            rel = raw[len("/static_v2/"):]
+            return str(STATIC_V2_DIR / rel)
+
+        return super().translate_path(path)
+
    def do_GET(self):
        parsed = urlparse(self.path)
+
+        # API endpoints
        if parsed.path == "/api/dashboard":
            self._handle_dashboard()
            return
@ -55,10 +70,16 @@ class GraphDemoHandler(SimpleHTTPRequestHandler):
        if parsed.path == "/api/import-status":
            self._handle_import_status(parsed.query)
            return
+
+        # Page routing — serve v2 HTML as default
        if parsed.path in ("/", "/index.html"):
-            self.path = "/index.html"
+            self.path = "/static_v2/index.html"
        elif parsed.path == "/graph":
-            self.path = "/graph.html"
+            self.path = "/static_v2/graph.html"
+        elif parsed.path == "/graph.html":
+            self.path = "/static_v2/graph.html"
+
+        # JS files (/app.js, /graph.js) resolve to STATIC_DIR via default translate_path
        super().do_GET()

    def do_POST(self):
@ -311,9 +332,9 @@ def _serialize_meeting(path: Path, include_content: bool = False):
    lines = raw_text.splitlines()
    for line in lines[:12]:
        if line.startswith('title: "'):
-            title = line[len('title: "') : -1]
+            title = line[len('title: "'):-1]
        elif line.startswith('date: "'):
-            date = line[len('date: "') : -1]
+            date = line[len('date: "'):-1]

    content_start = 0
    for idx, line in enumerate(lines):
@ -397,4 +418,4 @@ if __name__ == "__main__":
        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
        datefmt="%H:%M:%S",
    )
-    run_demo_server()
+    run_demo_server()
--- a/meeting_memory/web_demo/static/app.js
+++ b/meeting_memory/web_demo/static/app.js
@ -85,7 +85,7 @@ function renderStats(graph = {}, state = {}) {
    { label: "Neo4j", value: graph.enabled ? "在线" : "离线", icon: "⬡", color: graph.enabled ? "#34c759" : "#b3261e" },
    { label: "会议", value: graph.meetings ?? 0, icon: "📋", color: "#4a90d9" },
    { label: "实体", value: graph.entities ?? 0, icon: "◆", color: "#53c2da" },
-    { label: "关系", value: graph.facts ?? 0, icon: "↗", color: "#ff9500" },
+    { label: "关系", value: graph.relations ?? 0, icon: "↗", color: "#ff9500" },
    { label: "行动项", value: state.action_items_tracked ?? 0, icon: "☐", color: "#7f8bff" },
    { label: "指标", value: state.metrics_tracked ?? 0, icon: "📊", color: "#af52de" },
  ];
--- a/meeting_memory/web_demo/static/graph.js
+++ b/meeting_memory/web_demo/static/graph.js
@ -4,8 +4,7 @@ const graphNodeLimit = document.getElementById("graphNodeLimit");
 const graphEdgeLimit = document.getElementById("graphEdgeLimit");
 const graphSvg = document.getElementById("graphSvg");
 const graphMeta = document.getElementById("graphMeta");
-const graphDetail = document.getElementById("graphDetail");
-const relatedSearch = document.getElementById("relatedSearch");
+const detailPanel = document.getElementById("detailPanel");
 const graphTypeFilter = document.getElementById("graphTypeFilter");

 let selectedEntityTypes = null;
@ -76,35 +75,35 @@ async function loadGraphKinds() {
 }

 function renderInspector(content) {
-  graphDetail.innerHTML = content;
+  detailPanel.innerHTML = content;
 }

 async function loadRelated(query) {
-  if (!query) {
-    relatedSearch.innerHTML = "";
-    return;
-  }
+  if (!query) return;
  const response = await fetch(`/api/search?q=${encodeURIComponent(query)}&limit=4`);
  const payload = await response.json();
  const results = payload.results || [];
  if (!results.length) {
-    relatedSearch.innerHTML = empty("没有更多相关检索结果");
+    detailPanel.insertAdjacentHTML("beforeend", `
+      <div class="detail-section">
+        <p class="eyebrow">Related</p>
+        <div class="empty-state">没有更多相关检索结果</div>
+      </div>
+    `);
    return;
  }
-  relatedSearch.innerHTML = `
-    <div class="panel-head">
-      <div>
-        <p class="eyebrow">Related</p>
-        <h3>相关检索</h3>
-      </div>
+  detailPanel.insertAdjacentHTML("beforeend", `
+    <div class="detail-section">
+      <p class="eyebrow">Related</p>
+      <h3>相关检索</h3>
+      ${results.map((item) => `
+        <article class="result-card">
+          <strong>${h(item.title || item.kind || "结果")}</strong>
+          <p>${h(item.text || "")}</p>
+        </article>
+      `).join("")}
    </div>
-    ${results.map((item) => `
-      <article class="result-card">
-        <strong>${h(item.title || item.kind || "结果")}</strong>
-        <p>${h(item.text || "")}</p>
-      </article>
-    `).join("")}
-  `;
+  `);
 }

 function renderGraph(payload) {
@ -125,7 +124,6 @@ function renderGraph(payload) {
  if (!nodes.length) {
    graphSvg.innerHTML = "";
    renderInspector(empty("当前没有可显示的图谱数据"));
-    relatedSearch.innerHTML = "";
    return;
  }

@ -201,7 +199,6 @@ function renderGraph(payload) {
    text.setAttribute("y", r + 16);
    text.setAttribute("text-anchor", "middle");
    text.setAttribute("font-size", "11");
-    text.setAttribute("fill", "#22264d");
    text.setAttribute("data-type", "node-label");
    text.textContent = truncate(node.label, TRUNCATE_LENGTH);
    g.appendChild(text);
@ -392,36 +389,47 @@ function renderGraph(payload) {
            ${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
            <span class="chip">关系 ${h(related.length)}</span>
          </div>`;
-      } else if (kind === "fact") {
-        body = `
-          <p>${h(node.fact || node.description || "暂无描述")}</p>
-          <div class="chip-row">
-            ${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
-            <span class="chip">关系 ${h(related.length)}</span>
-          </div>`;
      } else {
+        const isMetric = (node.entity_type || "").toLowerCase() === "metric";
        body = `
          <p>${h(node.description || "暂无描述")}</p>
          <div class="chip-row">
            ${node.entity_type ? `<span class="chip">${h(node.entity_type)}</span>` : ""}
            ${node.date ? `<span class="chip">${h(node.date)}</span>` : ""}
            <span class="chip">关系 ${h(related.length)}</span>
-          </div>`;
+          </div>
+          ${isMetric ? `
+          <div class="metric-fields">
+            ${node.current_value ? `<p><strong>当前值：</strong>${h(node.current_value)}</p>` : ""}
+            ${node.target ? `<p><strong>目标值：</strong>${h(node.target)}</p>` : ""}
+            ${node.unit ? `<p><strong>单位：</strong>${h(node.unit)}</p>` : ""}
+            ${node.trend ? `<p><strong>趋势：</strong>${h(node.trend)}</p>` : ""}
+          </div>` : ""}`;
      }
      renderInspector(`
-        <div class="detail-card">
+        <div class="detail-section">
          <p class="eyebrow">${h(node.kind)}</p>
          <h3>${h(node.label)}</h3>
          ${body}
        </div>
-        ${related.map((edge) => `
-          <article class="result-card">
-            <strong>${h(edge.source)} → ${h(edge.target)}</strong>
-            <p>${h(edge.fact || edge.description || edge.predicate || "")}</p>
-          </article>
-        `).join("")}
+        <div class="detail-section">
+          <p class="eyebrow">Relations</p>
+          ${related.length ? related.map((edge) => `
+            <article class="result-card">
+              <strong>${h(edge.source)} → ${h(edge.target)}</strong>
+              <p>${h(edge.fact || edge.description || edge.predicate || "")}</p>
+            </article>
+          `).join("") : `<div class="empty-state">没有关联关系</div>`}
+        </div>
      `);
-      loadRelated(node.label).catch(() => relatedSearch.innerHTML = empty("相关检索加载失败"));
+      loadRelated(node.label).catch(() => {
+        detailPanel.insertAdjacentHTML("beforeend", `
+          <div class="detail-section">
+            <p class="eyebrow">Related</p>
+            <div class="empty-state">相关检索加载失败</div>
+          </div>
+        `);
+      });
    });
  });

@ -432,7 +440,7 @@ function renderGraph(payload) {
      line?.classList.add("active");
      const edge = edges.find((item) => item.id === el.dataset.edgeId);
      renderInspector(`
-        <div class="detail-card">
+        <div class="detail-section">
          <p class="eyebrow">Edge</p>
          <h3>${h(edge.source)} → ${h(edge.target)}</h3>
          <p>${h(edge.fact || edge.description || "暂无补充描述")}</p>
@ -444,7 +452,14 @@ function renderGraph(payload) {
          </div>
        </div>
      `);
-      loadRelated(`${edge.source} ${edge.predicate} ${edge.target}`).catch(() => relatedSearch.innerHTML = empty("相关检索加载失败"));
+      loadRelated(`${edge.source} ${edge.predicate} ${edge.target}`).catch(() => {
+        detailPanel.insertAdjacentHTML("beforeend", `
+          <div class="detail-section">
+            <p class="eyebrow">Related</p>
+            <div class="empty-state">相关检索加载失败</div>
+          </div>
+        `);
+      });
    });
  });

@ -514,4 +529,4 @@ graphForm?.addEventListener("submit", (event) => {
 });

 loadGraphKinds().catch(() => {});
-fetchGraph().catch((error) => renderInspector(empty(`图谱加载失败: ${error}`)));
+fetchGraph().catch((error) => renderInspector(empty(`图谱加载失败: ${error}`)));
--- a/meeting_memory/web_demo/static/styles.css
+++ b/meeting_memory/web_demo/static/styles.css
@ -1,977 +0,0 @@
-:root {
-  --primary: #5d67f5;
-  --primary-2: #7f8bff;
-  --primary-soft: #edf1ff;
-  --accent: #53c2da;
-  --bg: #f5f7ff;
-  --bg-2: #fbfcff;
-  --panel: rgba(255, 255, 255, 0.9);
-  --panel-strong: rgba(255, 255, 255, 0.96);
-  --border: rgba(212, 221, 247, 0.95);
-  --text: #22264d;
-  --muted: #68709d;
-  --danger: #b3261e;
-  --success: #11693c;
-  --shadow: 0 12px 28px rgba(73, 81, 141, 0.08);
-  --shadow-sm: 0 6px 16px rgba(73, 81, 141, 0.06);
-  --radius-xl: 20px;
-  --radius-lg: 16px;
-  --radius-md: 12px;
-  --radius-sm: 10px;
-}
-
-* { box-sizing: border-box; }
-
-html, body {
-  margin: 0;
-  min-height: 100%;
-}
-
-body {
-  font-family: "Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif;
-  font-size: 13px;
-  color: var(--text);
-  background:
-    radial-gradient(circle at 10% 10%, rgba(126, 186, 255, 0.16), transparent 24%),
-    radial-gradient(circle at 88% 14%, rgba(132, 121, 255, 0.12), transparent 22%),
-    linear-gradient(135deg, #f8faff 0%, var(--bg) 55%, var(--bg-2) 100%);
-}
-
-a { color: inherit; text-decoration: none; }
-
-button, input, textarea { font: inherit; }
-
-.shell {
-  display: grid;
-  grid-template-columns: 220px minmax(0, 1fr);
-  gap: 14px;
-  min-height: 100vh;
-  padding: 14px;
-}
-
-.sidebar, .panel, .detail-modal::backdrop {
-  backdrop-filter: blur(12px);
-}
-
-.sidebar {
-  display: flex;
-  flex-direction: column;
-  gap: 10px;
-  padding: 14px;
-  border: 1px solid var(--border);
-  border-radius: 22px;
-  background: linear-gradient(180deg, rgba(236, 243, 255, 0.92), rgba(255, 255, 255, 0.8));
-  box-shadow: var(--shadow);
-}
-
-.brand {
-  display: flex;
-  gap: 10px;
-  align-items: center;
-}
-
-.brand-mark {
-  width: 40px;
-  height: 40px;
-  display: grid;
-  place-items: center;
-  border-radius: 14px;
-  color: #fff;
-  font-size: 17px;
-  font-weight: 800;
-  background: linear-gradient(135deg, var(--primary), var(--primary-2));
-}
-
-.brand-kicker, .eyebrow {
-  margin: 0 0 3px;
-  color: var(--primary);
-  font-size: 10px;
-  font-weight: 700;
-  letter-spacing: 0.08em;
-  text-transform: uppercase;
-}
-
-.brand h1, .panel h3, .dialog-head h3 {
-  margin: 0;
-}
-
-.brand h1 { font-size: 18px; }
-
-.nav {
-  display: grid;
-  gap: 6px;
-}
-
-.nav-link {
-  padding: 10px 12px;
-  border: 1px solid transparent;
-  border-radius: var(--radius-md);
-  color: var(--muted);
-  font-size: 13px;
-  font-weight: 700;
-  transition: 0.2s ease;
-}
-
-.nav-link:hover, .nav-link.active {
-  color: var(--primary);
-  border-color: rgba(109, 123, 255, 0.16);
-  background: rgba(255, 255, 255, 0.78);
-}
-
-.side-card, .panel {
-  border: 1px solid var(--border);
-  border-radius: var(--radius-xl);
-  background: var(--panel);
-  box-shadow: var(--shadow-sm);
-}
-
-.panel { padding: 14px; }
-
-.panel-head {
-  display: flex;
-  justify-content: space-between;
-  align-items: start;
-  gap: 10px;
-  margin-bottom: 10px;
-}
-
-.panel h3 { font-size: 17px; }
-
-.sidebar-shortcuts {
-  display: flex;
-  flex-wrap: wrap;
-  gap: 6px;
-  padding: 10px;
-  margin-top: auto;
-}
-
-.pill-link, .chip {
-  display: inline-flex;
-  align-items: center;
-  min-height: 24px;
-  padding: 0 9px;
-  border-radius: 999px;
-  font-size: 11px;
-  font-weight: 700;
-}
-
-.pill-link {
-  background: rgba(255, 255, 255, 0.9);
-  border: 1px solid var(--border);
-}
-
-.chip {
-  background: var(--primary-soft);
-  color: var(--primary);
-}
-
-.chip.status-done, .chip.status-completed { background: #edfdf4; color: var(--success); }
-.chip.status-pending, .chip.status-todo { background: #fff8e7; color: #b8860b; }
-.chip.status-in_progress, .chip.status-active { background: #e8f4fd; color: #4a90d9; }
-.chip.status-blocked { background: #fff4f2; color: var(--danger); }
-
-.main {
-  display: flex;
-  flex-direction: column;
-  gap: 12px;
-  min-height: 0;
-}
-
-.main-toolbar {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  gap: 16px;
-  padding: 16px 18px;
-  border: 1px solid var(--border);
-  border-radius: 22px;
-  background:
-    radial-gradient(circle at top right, rgba(134, 144, 255, 0.12), transparent 28%),
-    linear-gradient(180deg, rgba(255, 255, 255, 0.94), rgba(244, 248, 255, 0.96));
-  box-shadow: var(--shadow);
-}
-
-.main-toolbar h2 {
-  margin: 0;
-  font-size: 22px;
-}
-
-.main-toolbar-actions {
-  display: flex;
-  gap: 8px;
-}
-
-.btn, .icon-btn {
-  border: none;
-  cursor: pointer;
-  transition: 0.2s ease;
-}
-
-.btn {
-  display: inline-flex;
-  align-items: center;
-  justify-content: center;
-  min-height: 36px;
-  padding: 0 14px;
-  border-radius: 11px;
-  font-size: 12px;
-  font-weight: 700;
-  color: #fff;
-  background: linear-gradient(135deg, var(--primary), var(--primary-2));
-  box-shadow: 0 8px 18px rgba(93, 103, 245, 0.18);
-}
-
-.btn:hover, .icon-btn:hover { transform: translateY(-1px); }
-
-.btn:disabled {
-  opacity: 0.68;
-  cursor: not-allowed;
-  transform: none;
-}
-
-.btn.ghost {
-  color: var(--primary);
-  background: rgba(255, 255, 255, 0.94);
-  box-shadow: none;
-  border: 1px solid var(--border);
-}
-
-.stats-grid, .content-grid, .workspace-grid {
-  display: grid;
-  gap: 12px;
-}
-
-.stats-grid { grid-template-columns: repeat(4, minmax(0, 1fr)); }
-
-.highlight-card {
-  padding: 0;
-  border: 1px solid var(--border);
-  border-radius: var(--radius-lg);
-  background: var(--panel-strong);
-  box-shadow: var(--shadow-sm);
-  overflow: hidden;
-}
-
-.highlight-card .hc-bar {
-  height: 4px;
-  background: var(--card-accent);
-}
-
-.highlight-card .eyebrow {
-  padding: 12px 14px 0;
-}
-
-.highlight-card strong {
-  display: block;
-  margin: 4px 0 2px;
-  padding: 0 14px;
-  font-size: 26px;
-  color: var(--card-accent);
-}
-
-.highlight-card p:last-child {
-  padding: 0 14px 14px;
-  margin: 0;
-  color: var(--muted);
-}
-
-.dashboard-grid {
-  grid-template-columns: minmax(330px, 1.1fr) minmax(340px, 1fr) minmax(220px, 0.72fr);
-  align-items: start;
-}
-
-.search-box, .import-form, .import-fieldset {
-  display: grid;
-  gap: 8px;
-}
-
-.import-fieldset {
-  margin: 0;
-  padding: 0;
-  border: 0;
-  min-width: 0;
-}
-
-.import-fieldset:disabled { opacity: 0.6; }
-
-.search-box input, .graph-controls input, textarea, input[type="file"] {
-  width: 100%;
-  min-height: 38px;
-  padding: 9px 12px;
-  border: 1px solid var(--border);
-  border-radius: 11px;
-  background: rgba(255, 255, 255, 0.94);
-  color: var(--text);
-}
-
-textarea {
-  min-height: 138px;
-  resize: vertical;
-}
-
-.field-label {
-  font-size: 11px;
-  font-weight: 700;
-  color: var(--muted);
-}
-
-.check-row {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  font-size: 12px;
-  color: var(--muted);
-}
-
-.status-box {
-  margin-top: 10px;
-  padding: 10px 12px;
-  border-radius: 12px;
-  border: 1px solid var(--border);
-  background: rgba(255, 255, 255, 0.76);
-  font-size: 12px;
-  color: var(--muted);
-}
-
-.status-box[data-kind="error"] {
-  color: var(--danger);
-  background: #fff4f2;
-}
-
-.status-box[data-kind="success"] {
-  color: var(--success);
-  background: #edfdf4;
-}
-
-.progress-list, .search-results, .mini-stats, .card-list, .list-stack, .related-search {
-  display: grid;
-  gap: 8px;
-}
-
-.progress-item, .mini-stat, .card, .list-item, .result-card, .detail-card {
-  padding: 12px;
-  border: 1px solid var(--border);
-  border-radius: 14px;
-  background: rgba(255, 255, 255, 0.88);
-}
-
-.progress-item {
-  display: grid;
-  grid-template-columns: 24px 1fr;
-  gap: 8px;
-  align-items: start;
-}
-
-.progress-index {
-  width: 24px;
-  height: 24px;
-  display: grid;
-  place-items: center;
-  border-radius: 999px;
-  background: var(--primary-soft);
-  color: var(--primary);
-  font-size: 11px;
-  font-weight: 700;
-}
-
-.mini-stat {
-  display: flex;
-  align-items: center;
-  gap: 10px;
-  padding: 10px 12px;
-}
-
-.ms-icon {
-  width: 32px;
-  height: 32px;
-  display: grid;
-  place-items: center;
-  border-radius: 10px;
-  font-size: 15px;
-  background: color-mix(in srgb, var(--stat-color) 14%, transparent);
-  color: var(--stat-color);
-  flex-shrink: 0;
-}
-
-.ms-body strong {
-  display: block;
-  font-size: 16px;
-  line-height: 1.2;
-}
-
-.ms-body p {
-  margin: 0;
-  font-size: 11px;
-  color: var(--muted);
-}
-
-.mini-stat strong, .card h4, .list-item strong, .result-card strong {
-  display: block;
-  margin-bottom: 4px;
-}
-
-.card { cursor: pointer; }
-
-.card:hover, .result-card:hover, .list-item:hover {
-  border-color: rgba(120, 132, 255, 0.34);
-}
-
-.content-grid { grid-template-columns: repeat(2, minmax(0, 1fr)); }
-
-/* ── Meeting card ── */
-
-.meeting-card {
-  display: flex;
-  gap: 10px;
-  padding: 12px;
-  border: 1px solid var(--border);
-  border-radius: 14px;
-  background: rgba(255, 255, 255, 0.88);
-  cursor: pointer;
-  transition: 0.2s ease;
-}
-
-.meeting-card:hover {
-  border-color: rgba(120, 132, 255, 0.34);
-}
-
-.mc-date {
-  flex-shrink: 0;
-  width: 44px;
-  height: 44px;
-  display: grid;
-  place-items: center;
-  border-radius: 10px;
-  background: var(--primary-soft);
-  color: var(--primary);
-  font-size: 11px;
-  font-weight: 700;
-  text-align: center;
-  line-height: 1.2;
-}
-
-.mc-body h4 {
-  margin: 0 0 4px;
-  font-size: 13px;
-}
-
-.mc-body p {
-  margin: 0;
-  font-size: 12px;
-  color: var(--muted);
-  display: -webkit-box;
-  -webkit-line-clamp: 2;
-  -webkit-box-orient: vertical;
-  overflow: hidden;
-}
-
-/* ── List item with priority dot ── */
-
-.list-item {
-  display: flex;
-  gap: 10px;
-  padding: 12px;
-  border: 1px solid var(--border);
-  border-radius: 14px;
-  background: rgba(255, 255, 255, 0.88);
-}
-
-.li-priority {
-  flex-shrink: 0;
-  width: 4px;
-  border-radius: 2px;
-  background: var(--pri-color);
-}
-
-.li-body {
-  flex: 1;
-  min-width: 0;
-}
-
-.li-body strong {
-  display: block;
-  margin-bottom: 2px;
-}
-
-.li-body p {
-  margin: 0 0 6px;
-  font-size: 12px;
-  color: var(--muted);
-}
-
-/* ── Metric card ── */
-
-.metric-card {
-  padding: 12px;
-  border: 1px solid var(--border);
-  border-radius: 14px;
-  background: rgba(255, 255, 255, 0.88);
-}
-
-.mc-head {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  margin-bottom: 2px;
-}
-
-.mc-head strong {
-  display: block;
-}
-
-.mc-value {
-  font-size: 16px;
-  font-weight: 700;
-  color: var(--primary);
-}
-
-.metric-card p {
-  margin: 0 0 8px;
-  font-size: 12px;
-  color: var(--muted);
-}
-
-.mc-bar-track {
-  height: 4px;
-  border-radius: 2px;
-  background: rgba(212, 221, 247, 0.5);
-  margin-bottom: 8px;
-  overflow: hidden;
-}
-
-.mc-bar-fill {
-  height: 100%;
-  border-radius: 2px;
-  background: linear-gradient(90deg, var(--primary), var(--primary-2));
-  transition: width 0.4s ease;
-}
-
-/* ── Series card ── */
-
-.series-card {
-  display: flex;
-  gap: 10px;
-  align-items: center;
-  padding: 12px;
-  border: 1px solid var(--border);
-  border-radius: 14px;
-  background: rgba(255, 255, 255, 0.88);
-}
-
-.sc-count {
-  flex-shrink: 0;
-  width: 36px;
-  height: 36px;
-  display: grid;
-  place-items: center;
-  border-radius: 10px;
-  font-size: 14px;
-  font-weight: 700;
-  background: var(--primary-soft);
-  color: var(--primary);
-}
-
-.sc-body strong {
-  display: block;
-  margin-bottom: 2px;
-}
-
-.sc-body p {
-  margin: 0;
-  font-size: 12px;
-  color: var(--muted);
-}
-
-/* ── Unified Import / Search panel ── */
-
-.unified-panel {
-  display: flex;
-  flex-direction: column;
-}
-
-.unified-tabs {
-  display: flex;
-  gap: 4px;
-  margin-bottom: 12px;
-  padding: 3px;
-  border-radius: 11px;
-  background: rgba(212, 221, 247, 0.3);
-}
-
-.unified-tab {
-  flex: 1;
-  padding: 7px 12px;
-  border: none;
-  border-radius: 8px;
-  font-size: 12px;
-  font-weight: 700;
-  cursor: pointer;
-  background: transparent;
-  color: var(--muted);
-  transition: 0.2s ease;
-}
-
-.unified-tab.active {
-  background: #fff;
-  color: var(--primary);
-  box-shadow: 0 2px 6px rgba(73, 81, 141, 0.1);
-}
-
-.unified-tab:hover:not(.active) {
-  color: var(--text);
-}
-
-.unified-pane.hidden {
-  display: none;
-}
-
-/* ── Result card with kind badge ── */
-
-.result-card {
-  position: relative;
-}
-
-.rc-kind {
-  display: inline-block;
-  padding: 1px 7px;
-  border-radius: 4px;
-  font-size: 10px;
-  font-weight: 700;
-  text-transform: uppercase;
-  background: var(--primary-soft);
-  color: var(--primary);
-  margin-bottom: 4px;
-}
-
-.empty-state {
-  padding: 16px 14px;
-  text-align: center;
-  border: 1px dashed var(--border);
-  border-radius: 14px;
-  color: var(--muted);
-}
-
-.detail-modal {
-  width: min(820px, calc(100vw - 24px));
-  border: 1px solid var(--border);
-  border-radius: 20px;
-  padding: 0;
-  background: rgba(255, 255, 255, 0.97);
-  box-shadow: var(--shadow);
-}
-
-.detail-modal::backdrop {
-  background: rgba(37, 44, 78, 0.28);
-}
-
-.dialog-head {
-  display: flex;
-  justify-content: space-between;
-  gap: 10px;
-  padding: 16px 16px 6px;
-}
-
-.dialog-meta { padding: 0 16px 6px; color: var(--muted); }
-
-.dialog-content {
-  margin: 0;
-  padding: 0 16px 16px;
-  white-space: pre-wrap;
-  font-family: "Consolas", "Courier New", monospace;
-  max-height: 60vh;
-  overflow: auto;
-  color: var(--muted);
-}
-
-.icon-btn {
-  width: 30px;
-  height: 30px;
-  border-radius: 10px;
-  background: rgba(242, 245, 255, 0.92);
-  color: var(--primary);
-  font-size: 20px;
-}
-
-/* ── Graph page ── */
-
-.graph-shell {
-  height: 100vh;
-  overflow: hidden;
-  gap: 10px;
-  padding: 10px;
-}
-
-.graph-shell .sidebar {
-  flex-shrink: 0;
-}
-
-.graph-shell .main {
-  gap: 8px;
-}
-
-.graph-shell .graph-layout {
-  gap: 8px;
-}
-
-.graph-shell .graph-layout .panel {
-  padding: 10px;
-}
-
-.graph-layout {
-  display: grid;
-  grid-template-columns: 1fr 300px;
-  gap: 12px;
-  flex: 1;
-  min-height: 0;
-}
-
-.graph-stage-panel {
-  display: flex;
-  flex-direction: column;
-  padding: 0;
-  overflow: hidden;
-}
-
-.graph-stage {
-  flex: 1;
-  min-height: 0;
-  position: relative;
-  background:
-    linear-gradient(180deg, rgba(251, 253, 255, 0.96), rgba(241, 246, 255, 0.94)),
-    radial-gradient(circle at center, rgba(133, 196, 255, 0.08), transparent 36%);
-}
-
-#graphSvg {
-  width: 100%;
-  height: 100%;
-  display: block;
-}
-
-.detail-panel {
-  display: flex;
-  flex-direction: column;
-  gap: 8px;
-  overflow: hidden;
-}
-
-.detail-panel .detail-card,
-.detail-panel .related-search {
-  overflow-y: auto;
-}
-
-.detail-card {
-  flex-shrink: 0;
-  word-break: break-all;
-}
-
-.detail-card strong {
-  word-break: break-word;
-}
-
-.related-search {
-  flex-shrink: 0;
-}
-
-.related-search .result-card {
-  word-break: break-all;
-}
-
-/* ── Graph toolbar ── */
-
-.graph-toolbar { padding: 8px 12px; }
-
-.graph-controls {
-  display: flex;
-  gap: 6px;
-  align-items: center;
-}
-
-.graph-controls .search-input {
-  flex: 1;
-  min-height: 30px;
-  padding: 6px 10px;
-}
-
-.graph-controls label.field-label {
-  display: flex;
-  align-items: center;
-  gap: 2px;
-  white-space: nowrap;
-  font-size: 10px;
-}
-
-.graph-controls label.field-label input {
-  width: 44px;
-  min-height: 26px;
-  padding: 4px 6px;
-}
-
-.graph-controls .btn {
-  min-height: 30px;
-  padding: 0 12px;
-  font-size: 11px;
-}
-
-.graph-toolbar-row {
-  display: flex;
-  justify-content: space-between;
-  align-items: center;
-  flex-wrap: wrap;
-  gap: 6px;
-  margin-top: 6px;
-}
-
-.graph-actions {
-  display: flex;
-  align-items: center;
-  gap: 8px;
-  font-size: 11px;
-  color: var(--muted);
-}
-
-.graph-type-filter {
-  display: flex;
-  flex-wrap: wrap;
-  align-items: center;
-  gap: 4px 10px;
-}
-
-.graph-type-filter label {
-  display: inline-flex;
-  align-items: center;
-  gap: 3px;
-  font-size: 11px;
-  color: var(--muted);
-  cursor: pointer;
-  user-select: none;
-}
-
-.graph-type-filter label input {
-  margin: 0;
-  accent-color: var(--primary);
-}
-
-.graph-meta { font-size: 11px; color: var(--muted); }
-
-/* ── Graph nodes & edges ── */
-
-.graph-node { cursor: pointer; }
-
-.graph-node circle {
-  stroke: rgba(255, 255, 255, 0.85);
-  stroke-width: 2;
-  transition: filter 0.15s;
-}
-
-.graph-node--meeting circle { fill: #4a90d9; }
-.graph-node--episode circle { fill: #34c759; }
-.graph-node--entity  circle { fill: var(--accent); }
-.graph-node--fact    circle { fill: #ff9500; }
-
-.graph-node:hover circle { filter: brightness(1.2); }
-
-.graph-node text {
-  font-size: 11px;
-  fill: var(--text);
-  pointer-events: none;
-  user-select: none;
-}
-
-.graph-edge {
-  stroke: rgba(120, 136, 194, 0.42);
-  stroke-width: 1.6;
-  cursor: pointer;
-  transition: stroke 0.15s, stroke-width 0.15s;
-}
-
-.edge-wrap:hover .graph-edge {
-  stroke: rgba(120, 136, 194, 0.7);
-  stroke-width: 2;
-}
-
-.graph-edge.active {
-  stroke: var(--primary);
-  stroke-width: 2.4;
-}
-
-.edge-wrap text {
-  pointer-events: none;
-  user-select: none;
-}
-
-/* ── Legend ── */
-
-.legend { font-size: 11px; color: var(--muted); }
-
-.legend-dot {
-  display: inline-block;
-  width: 9px;
-  height: 9px;
-  border-radius: 50%;
-  margin-right: 6px;
-}
-
-.legend-dot.meeting { background: #4a90d9; }
-.legend-dot.episode { background: #34c759; }
-.legend-dot.entity  { background: var(--accent); }
-.legend-dot.fact    { background: #ff9500; }
-
-.graph-shell .sidebar {
-  gap: 8px;
-  padding: 10px;
-}
-
-.graph-shell .sidebar .legend {
-  display: flex;
-  flex-direction: column;
-  gap: 3px;
-  font-size: 11px;
-  padding: 0 4px;
-}
-
-.graph-shell .sidebar .legend .eyebrow {
-  margin-bottom: 4px;
-}
-
-/* ── Graph controls overlay ── */
-
-.zoom-reset-btn, .pause-btn {
-  font-size: 11px;
-  min-height: 28px;
-  padding: 0 10px;
-}
-
-.zoom-hint {
-  font-size: 11px;
-  color: var(--muted);
-  padding: 4px 0;
-}
-
-/* ── Responsive ── */
-
-@media (max-width: 1240px) {
-  .shell, .graph-shell, .dashboard-grid, .content-grid, .graph-layout, .stats-grid {
-    grid-template-columns: 1fr;
-  }
-
-  .sidebar { order: 2; }
-
-  .graph-shell { height: auto; overflow: auto; }
-}
-
-@media (max-width: 720px) {
-  .shell, .graph-shell {
-    padding: 10px;
-    gap: 10px;
-  }
-
-  .sidebar, .panel { border-radius: 18px; }
-
-  .search-box { grid-template-columns: 1fr; }
-
-  .graph-stage { min-height: 250px; }
-
-  .graph-controls { flex-wrap: wrap; }
-
-  .graph-controls .search-input { min-width: 100%; }
-}
--- a/meeting_memory/web_demo/static_v2/graph.html
+++ b/meeting_memory/web_demo/static_v2/graph.html
@ -3,11 +3,12 @@
 <head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Neo4j Graph Explorer</title>
-  <link rel="stylesheet" href="/styles.css">
+  <title>图谱浏览 — Meeting Memory</title>
+  <link rel="stylesheet" href="/static_v2/styles.css">
 </head>
 <body>
  <div class="shell graph-shell">
+    <!-- ====== Sidebar ====== -->
    <aside class="sidebar">
      <div class="brand">
        <div class="brand-mark">G</div>
@ -18,25 +19,33 @@
      </div>

      <nav class="nav">
-        <a class="nav-link" href="/index.html">总览面板</a>
-        <a class="nav-link active" href="/graph.html">图谱浏览</a>
+        <a class="nav-link" href="/">总览面板</a>
+        <a class="nav-link active" href="/graph">图谱浏览</a>
      </nav>

      <div class="legend">
-        <p class="eyebrow" style="margin-bottom:6px">图例</p>
+        <p class="eyebrow">图例</p>
        <span><i class="legend-dot meeting"></i>会议</span>
        <span><i class="legend-dot episode"></i>片段</span>
        <span><i class="legend-dot entity"></i>实体</span>
-        <span><i class="legend-dot fact"></i>事实</span>
+        <span><i class="legend-dot edge"></i>关系</span>
      </div>
    </aside>

+    <!-- ====== Main ====== -->
    <main class="main">
-      <div class="graph-toolbar panel">
+      <!-- Graph Toolbar -->
+      <div class="panel graph-toolbar">
        <form class="graph-controls" id="graphSearchForm">
          <input id="graphQueryInput" type="text" placeholder="搜索节点名称或关键词…" class="search-input">
-          <label class="field-label">节点 <input id="graphNodeLimit" type="number" min="10" max="200" step="10" value="60"></label>
-          <label class="field-label">关系 <input id="graphEdgeLimit" type="number" min="10" max="300" step="10" value="120"></label>
+          <label class="field-label">
+            节点
+            <input id="graphNodeLimit" type="number" min="10" max="200" step="10" value="60">
+          </label>
+          <label class="field-label">
+            关系
+            <input id="graphEdgeLimit" type="number" min="10" max="300" step="10" value="120">
+          </label>
          <button class="btn" type="submit">更新</button>
        </form>
        <div class="graph-toolbar-row">
@ -47,18 +56,18 @@
        </div>
      </div>

+      <!-- Graph Layout -->
      <div class="graph-layout">
+        <!-- Graph Stage -->
        <div class="panel graph-stage-panel">
          <div class="graph-stage" id="graphStage">
            <svg id="graphSvg" viewBox="0 0 960 640" preserveAspectRatio="xMidYMid meet"></svg>
          </div>
        </div>

-        <div class="panel detail-panel">
-          <div class="detail-card" id="graphDetail">
-            <div class="empty-state">点击节点或关系查看详情</div>
-          </div>
-          <div class="related-search" id="relatedSearch"></div>
+        <!-- Detail Panel -->
+        <div class="panel detail-panel" id="detailPanel">
+          <div class="empty-state">点击节点或关系查看详情</div>
        </div>
      </div>
    </main>
@ -66,4 +75,4 @@

  <script src="/graph.js"></script>
 </body>
-</html>
+</html>
--- a/meeting_memory/web_demo/static_v2/index.html
+++ b/meeting_memory/web_demo/static_v2/index.html
@ -3,11 +3,12 @@
 <head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Meeting Memory Console</title>
-  <link rel="stylesheet" href="/styles.css">
+  <title>会议记忆中枢 — Meeting Memory</title>
+  <link rel="stylesheet" href="/static_v2/styles.css">
 </head>
 <body>
  <div class="shell">
+    <!-- ====== Sidebar ====== -->
    <aside class="sidebar">
      <div class="brand">
        <div class="brand-mark">M</div>
@ -18,18 +19,20 @@
      </div>

      <nav class="nav">
-        <a class="nav-link active" href="/index.html">总览面板</a>
-        <a class="nav-link" href="/graph.html">图谱浏览</a>
+        <a class="nav-link active" href="/">总览面板</a>
+        <a class="nav-link" href="/graph">图谱浏览</a>
      </nav>

-      <div class="side-card sidebar-shortcuts">
+      <div class="sidebar-shortcuts">
        <a class="pill-link" href="#import-panel">导入会议</a>
        <a class="pill-link" href="#search-panel">知识检索</a>
-        <a class="pill-link" href="/graph.html">图谱页</a>
+        <a class="pill-link" href="/graph">图谱页</a>
      </div>
    </aside>

+    <!-- ====== Main ====== -->
    <main class="main">
+      <!-- Toolbar -->
      <div class="main-toolbar">
        <div>
          <p class="eyebrow">Dashboard</p>
@ -40,8 +43,10 @@
        </div>
      </div>

+      <!-- Highlight Cards -->
      <section class="stats-grid" id="highlightGrid"></section>

+      <!-- Unified Panel: Import / Search / Stats -->
      <section class="panel unified-panel">
        <div class="unified-tabs">
          <button class="unified-tab active" data-tab="import">导入</button>
@ -49,6 +54,7 @@
          <button class="unified-tab" data-tab="stats">统计</button>
        </div>

+        <!-- Import Pane -->
        <div class="unified-pane" id="unifiedImport">
          <form class="import-form" id="importForm">
            <fieldset id="importFieldset" class="import-fieldset">
@ -73,6 +79,7 @@
          </div>
        </div>

+        <!-- Search Pane -->
        <div class="unified-pane hidden" id="unifiedSearch">
          <form class="search-box" id="searchForm">
            <input id="searchInput" type="text" placeholder="搜索会议主题、负责人、指标、关系事实...">
@ -83,11 +90,13 @@
          </div>
        </div>

+        <!-- Stats Pane -->
        <div class="unified-pane hidden" id="unifiedStats">
          <div class="mini-stats" id="statsList"></div>
        </div>
      </section>

+      <!-- Content Grid -->
      <div class="content-grid">
        <section class="panel" id="meeting-list">
          <div class="panel-head">
@ -124,6 +133,7 @@
    </main>
  </div>

+  <!-- Meeting Detail Dialog -->
  <dialog class="detail-modal" id="meetingDialog">
    <div class="dialog-head">
      <div>
@ -138,4 +148,4 @@

  <script src="/app.js"></script>
 </body>
-</html>
+</html>
--- a/meeting_memory/web_demo/static_v2/styles.css
+++ b/meeting_memory/web_demo/static_v2/styles.css
--- a/scripts/migrate_v1_to_v2.py
+++ b/scripts/migrate_v1_to_v2.py
@ -0,0 +1,186 @@
+"""
+Migration script: v1 (flat Entity + Fact nodes) → v2 (composite labels + direct edges)
+
+Steps:
+1. Add composite Neo4j labels to existing Entity nodes based on entity_type
+2. Convert Fact nodes to RELATES_TO edges between Entity nodes
+3. Verify data integrity
+"""
+import logging
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+from meeting_memory.graph_store import graph_store, _canonical_entity_type, _EntityType
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
+logger = logging.getLogger('migrate')
+
+
+def get_type_label_map() -> dict[str, str]:
+    """Map canonical entity_type -> Neo4j label"""
+    return {
+        _EntityType.DEPARTMENT.value: 'Department',
+        _EntityType.PROJECT.value: 'Project',
+        _EntityType.METRIC.value: 'Metric',
+        _EntityType.PERSON.value: 'Person',
+        _EntityType.SYSTEM.value: 'System',
+        _EntityType.DOCUMENT.value: 'Document',
+    }
+
+
+def step1_add_composite_labels():
+    """Add composite labels (e.g., :Department) to existing Entity nodes."""
+    type_label_map = get_type_label_map()
+    total = 0
+    for canonical_type, label in type_label_map.items():
+        rows = graph_store.run_query(
+            'MATCH (e:Entity) WHERE e.entity_type = $etype RETURN count(e) AS cnt',
+            etype=canonical_type,
+        )
+        count = rows[0]['cnt'] if rows else 0
+        if count == 0:
+            logger.info('  No Entity with entity_type=%s to migrate', canonical_type)
+            continue
+        graph_store.run_query(
+            f'MATCH (e:Entity) WHERE e.entity_type = $etype SET e:{label}',
+            etype=canonical_type,
+        )
+        logger.info('  Added :%s label to %d Entity nodes', label, count)
+        total += count
+
+    # Also handle aliases: Organization -> Department
+    for alias in ('组织', 'Organization', '部门'):
+        rows = graph_store.run_query(
+            'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
+            etype=alias,
+        )
+        count = rows[0]['cnt'] if rows else 0
+        if count == 0:
+            continue
+        graph_store.run_query(
+            'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Department',
+            etype=alias, canonical=_EntityType.DEPARTMENT.value,
+        )
+        logger.info('  Redirected %d entities from entity_type=%s -> Department', count, alias)
+        total += count
+
+    for alias in ('指标', 'kpi', 'KPI'):
+        rows = graph_store.run_query(
+            'MATCH (e:Entity {entity_type: $etype}) RETURN count(e) AS cnt',
+            etype=alias,
+        )
+        count = rows[0]['cnt'] if rows else 0
+        if count == 0:
+            continue
+        graph_store.run_query(
+            'MATCH (e:Entity {entity_type: $etype}) SET e.entity_type = $canonical, e:Metric',
+            etype=alias, canonical=_EntityType.METRIC.value,
+        )
+        logger.info('  Redirected %d entities from entity_type=%s -> Metric', count, alias)
+        total += count
+
+    logger.info('Step 1 done: %d entities got composite labels', total)
+
+
+def step2_convert_facts_to_edges():
+    """Convert existing Fact nodes to RELATES_TO edges, then remove Fact nodes."""
+    facts = graph_store.run_query('''
+        MATCH (s:Entity)-[:FACT_SOURCE]->(f:Fact)-[:FACT_TARGET]->(t:Entity)
+        RETURN s.name AS source, t.name AS target,
+               f.predicate AS relation_type,
+               f.fact AS fact,
+               f.qualifiers AS qualifiers,
+               f.evidence AS evidence,
+               f.confidence AS confidence,
+               f.valid_at AS valid_at,
+               f.invalid_at AS invalid_at,
+               f.meeting_id AS meeting_id,
+               f.meeting_date AS meeting_date,
+               f.fact_embedding AS fact_embedding
+    ''')
+    logger.info('Found %d Fact nodes to convert', len(facts))
+
+    converted = 0
+    for f in facts:
+        source = f.get('source', '')
+        target = f.get('target', '')
+        rtype = f.get('relation_type', '') or '关联'
+        if not source or not target:
+            continue
+        fact_embedding = f.get('fact_embedding') or []
+        graph_store.run_query('''
+            MATCH (s:Entity {name: $source})
+            MATCH (t:Entity {name: $target})
+            MERGE (s)-[r:RELATES_TO {name: $rtype}]->(t)
+            SET r.fact = $fact,
+                r.evidence = $evidence,
+                r.qualifiers = $qualifiers,
+                r.confidence = $confidence,
+                r.valid_at = $valid_at,
+                r.invalid_at = $invalid_at,
+                r.meeting_id = $meeting_id,
+                r.meeting_date = $meeting_date,
+                r.updated_at = datetime()
+        ''',
+            source=source,
+            target=target,
+            rtype=rtype,
+            fact=f.get('fact', ''),
+            evidence=f.get('evidence', ''),
+            qualifiers=f.get('qualifiers', []),
+            confidence=f.get('confidence', 0.0),
+            valid_at=f.get('valid_at', ''),
+            invalid_at=f.get('invalid_at', ''),
+            meeting_id=f.get('meeting_id', ''),
+            meeting_date=f.get('meeting_date', ''),
+        )
+        if fact_embedding:
+            graph_store.run_query('''
+                MATCH (s:Entity {name: $source})-[r:RELATES_TO {name: $rtype}]->(t:Entity {name: $target})
+                SET r.fact_embedding = $embedding
+            ''', source=source, target=target, rtype=rtype, embedding=fact_embedding)
+        converted += 1
+
+    # Now remove Fact nodes and their incident edges
+    graph_store.run_query('''
+        MATCH (f:Fact)
+        OPTIONAL MATCH (f)-[r]-()
+        DELETE r, f
+    ''')
+    logger.info('Step 2 done: converted %d facts to edges, removed Fact nodes', converted)
+
+
+def verify():
+    """Verify migration results."""
+    stats = graph_store.get_stats()
+    logger.info('Final stats: %s', stats)
+
+    types = graph_store.get_entity_types()
+    logger.info('Entity types: %s', [(t['entity_type'], t['count']) for t in types])
+
+    kinds = graph_store.get_graph_kinds()
+    logger.info('Graph kinds: %s', [(k['kind'], k['count']) for k in kinds])
+
+    # Count labeled entities
+    for label in ('Department', 'Project', 'Metric', 'Person', 'System', 'Document'):
+        rows = graph_store.run_query(f'MATCH (n:{label}) RETURN count(n) AS cnt')
+        count = rows[0]['cnt'] if rows else 0
+        if count:
+            logger.info('  :%s nodes: %d', label, count)
+
+    edges = graph_store.run_query('MATCH ()-[r:RELATES_TO]->() RETURN count(r) AS cnt')
+    logger.info('  RELATES_TO edges: %d', edges[0]['cnt'] if edges else 0)
+
+
+if __name__ == '__main__':
+    if not graph_store.enabled:
+        logger.error('Neo4j is not available')
+        sys.exit(1)
+
+    logger.info('Starting v1→v2 migration...')
+    step1_add_composite_labels()
+    step2_convert_facts_to_edges()
+    verify()
+    logger.info('Migration complete')
Author	SHA1	Message	Date
Bifang	b5649cc218	重构Neo4j保存数据结构	2026-06-15 13:13:20 +08:00
Bifang	f857a90977	web更新	2026-06-15 09:17:21 +08:00
Bifang	b7d4cc8782	测试版本1	2026-06-12 10:57:32 +08:00