feat(思维导图助手):思维导图助手过程1
parent
75835051b2
commit
59e2554238
|
|
@ -83,7 +83,8 @@ def _update_task_progress(task_id: Optional[str], progress: int, stage: str):
|
|||
return
|
||||
state = _get_task_progress(task_id)
|
||||
if state is not None:
|
||||
state["progress"] = min(progress, 100)
|
||||
current = int(state.get("progress", 0) or 0)
|
||||
state["progress"] = min(max(progress, current), 100)
|
||||
state["stage"] = stage
|
||||
_store_task_progress(task_id, state)
|
||||
|
||||
|
|
@ -162,7 +163,11 @@ class _StderrProgressCapture:
|
|||
|
||||
# tqdm 进度条模式:名称: 百分比|...| 当前/总数
|
||||
_PATTERNS = [
|
||||
(re.compile(r'Two Step Extraction:\s*(\d+)%.*?(\d+)/(\d+)'), 'extract'),
|
||||
(re.compile(r'Layout Preparation:\s*(\d+)%.*?(\d+)/(\d+)'), 'layout_prepare'),
|
||||
(re.compile(r'Layout Output Parsing:\s*(\d+)%.*?(\d+)/(\d+)'), 'layout_parse'),
|
||||
(re.compile(r'Extract Preparation:\s*(\d+)%.*?(\d+)/(\d+)'), 'extract_prepare'),
|
||||
(re.compile(r'Post Processing:\s*(\d+)%.*?(\d+)/(\d+)'), 'post_process'),
|
||||
(re.compile(r'Two Step Extraction:\s*(\d+)%.*?(\d+)/(\d+)'), 'vlm_predict'),
|
||||
(re.compile(r'MFD Predict:\s*(\d+)%.*?(\d+)/(\d+)'), 'mfd'),
|
||||
(re.compile(r'MFR Predict:\s*(\d+)%.*?(\d+)/(\d+)'), 'mfr'),
|
||||
(re.compile(r'OCR-det:\s*(\d+)%.*?(\d+)/(\d+)'), 'ocr_det'),
|
||||
|
|
@ -170,22 +175,35 @@ class _StderrProgressCapture:
|
|||
(re.compile(r'Loading safetensors.*?:\s*(\d+)%'), 'load_model'),
|
||||
(re.compile(r'Capturing CUDA graphs.*?:\s*(\d+)%'), 'cuda_graph'),
|
||||
]
|
||||
_GENERIC_PREDICT_PATTERN = re.compile(r'^Predict:\s*(\d+)%.*?(\d+)/(\d+)')
|
||||
|
||||
# 各阶段的进度映射范围 [start%, end%]
|
||||
_RANGES = {
|
||||
'load_model': (12, 18),
|
||||
'cuda_graph': (33, 37),
|
||||
'extract': (42, 65),
|
||||
'mfd': (75, 80),
|
||||
'mfr': (80, 87),
|
||||
'ocr_det': (87, 92),
|
||||
'ocr_rec': (92, 96),
|
||||
'layout_prepare': (42, 45),
|
||||
'layout_predict': (45, 68),
|
||||
'layout_parse': (68, 70),
|
||||
'extract_prepare': (70, 72),
|
||||
'extract_predict': (72, 88),
|
||||
'vlm_predict': (45, 88),
|
||||
'post_process': (88, 90),
|
||||
'mfd': (90, 92),
|
||||
'mfr': (92, 94),
|
||||
'ocr_det': (94, 96),
|
||||
'ocr_rec': (96, 97),
|
||||
}
|
||||
|
||||
_STAGE_LABELS = {
|
||||
'load_model': '加载模型权重',
|
||||
'cuda_graph': '捕获CUDA计算图',
|
||||
'extract': 'VLM文档分析',
|
||||
'layout_prepare': '准备版面分析',
|
||||
'layout_predict': '版面分析',
|
||||
'layout_parse': '解析版面结果',
|
||||
'extract_prepare': '准备内容抽取',
|
||||
'extract_predict': '内容抽取',
|
||||
'vlm_predict': 'VLM文档分析',
|
||||
'post_process': '后处理',
|
||||
'mfd': '数学公式检测',
|
||||
'mfr': '数学公式识别',
|
||||
'ocr_det': '文字区域检测',
|
||||
|
|
@ -195,55 +213,83 @@ class _StderrProgressCapture:
|
|||
def __init__(self, task_id: str):
|
||||
self.task_id = task_id
|
||||
self._active = False
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._orig_stderr = None
|
||||
self._buf = ""
|
||||
self._last_anchor = ""
|
||||
|
||||
def start(self):
|
||||
self._active = True
|
||||
self._orig_stderr = sys.stderr
|
||||
self._thread = threading.Thread(target=self._reader_loop, daemon=True)
|
||||
self._thread.start()
|
||||
sys.stderr = self
|
||||
|
||||
def stop(self):
|
||||
self._active = False
|
||||
if self._thread and self._thread.is_alive():
|
||||
self._thread.join(timeout=2)
|
||||
self._thread = None
|
||||
if self._buf.strip():
|
||||
self._parse_line(self._buf.strip())
|
||||
self._buf = ""
|
||||
if self._orig_stderr is not None and sys.stderr is self:
|
||||
sys.stderr = self._orig_stderr
|
||||
|
||||
def _reader_loop(self):
|
||||
buf = ""
|
||||
orig = self._orig_stderr
|
||||
while self._active:
|
||||
try:
|
||||
ch = orig.read(1)
|
||||
if not ch:
|
||||
break
|
||||
buf += ch
|
||||
# tqdm 用 \r 更新同一行,\n 表示新行
|
||||
if ch == '\r' or ch == '\n':
|
||||
if buf.strip():
|
||||
self._parse_line(buf.strip())
|
||||
buf = ""
|
||||
except Exception:
|
||||
break
|
||||
def write(self, text):
|
||||
if self._orig_stderr is not None:
|
||||
self._orig_stderr.write(text)
|
||||
if not self._active:
|
||||
return len(text)
|
||||
for ch in text:
|
||||
self._buf += ch
|
||||
# tqdm 用 \r 更新同一行,\n 表示新行
|
||||
if ch == '\r' or ch == '\n':
|
||||
if self._buf.strip():
|
||||
self._parse_line(self._buf.strip())
|
||||
self._buf = ""
|
||||
return len(text)
|
||||
|
||||
def flush(self):
|
||||
if self._orig_stderr is not None:
|
||||
self._orig_stderr.flush()
|
||||
|
||||
def isatty(self):
|
||||
return bool(self._orig_stderr and self._orig_stderr.isatty())
|
||||
|
||||
def fileno(self):
|
||||
if self._orig_stderr is not None:
|
||||
return self._orig_stderr.fileno()
|
||||
raise OSError("stderr is not available")
|
||||
|
||||
def __getattr__(self, name):
|
||||
if self._orig_stderr is not None:
|
||||
return getattr(self._orig_stderr, name)
|
||||
raise AttributeError(name)
|
||||
|
||||
def _parse_line(self, line: str):
|
||||
if "Layout Preparation:" in line:
|
||||
self._last_anchor = "layout"
|
||||
elif "Extract Preparation:" in line:
|
||||
self._last_anchor = "extract"
|
||||
|
||||
generic_predict = self._GENERIC_PREDICT_PATTERN.search(line)
|
||||
if generic_predict:
|
||||
stage = "extract_predict" if self._last_anchor == "extract" else "layout_predict"
|
||||
self._update_from_match(generic_predict, stage)
|
||||
return
|
||||
|
||||
for pattern, stage in self._PATTERNS:
|
||||
m = pattern.search(line)
|
||||
if m:
|
||||
pct = int(m.group(1))
|
||||
lo, hi = self._RANGES.get(stage, (0, 100))
|
||||
mapped = lo + int((hi - lo) * pct / 100)
|
||||
label = self._STAGE_LABELS.get(stage, stage)
|
||||
if stage == 'extract' and len(m.groups()) >= 3:
|
||||
cur, total = m.group(2), m.group(3)
|
||||
label = f"VLM文档分析 ({cur}/{total}页)"
|
||||
elif stage in ('mfd', 'mfr', 'ocr_det', 'ocr_rec') and len(m.groups()) >= 3:
|
||||
cur, total = m.group(2), m.group(3)
|
||||
label = f"{label} ({cur}/{total})"
|
||||
_update_task_progress(self.task_id, mapped, label)
|
||||
self._update_from_match(m, stage)
|
||||
break
|
||||
|
||||
def _update_from_match(self, match, stage: str):
|
||||
pct = int(match.group(1))
|
||||
lo, hi = self._RANGES.get(stage, (0, 100))
|
||||
mapped = lo + int((hi - lo) * pct / 100)
|
||||
label = self._STAGE_LABELS.get(stage, stage)
|
||||
if len(match.groups()) >= 3:
|
||||
cur, total = match.group(2), match.group(3)
|
||||
unit = "页" if stage in ("layout_predict", "vlm_predict") else ""
|
||||
label = f"{label} ({cur}/{total}{unit})"
|
||||
_update_task_progress(self.task_id, mapped, label)
|
||||
|
||||
|
||||
async def limit_concurrency():
|
||||
if _request_semaphore is not None:
|
||||
|
|
|
|||
Loading…
Reference in New Issue