diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9ed5841 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,5 @@ +*.js linguist-vendored +*.mjs linguist-vendored +*.html linguist-documentation +*.css linguist-vendored +*.scss linguist-vendored \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..685b7d2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,54 @@ +*.tar +*.tar.gz +*.zip +venv*/ +envs/ +slurm_logs/ + +sync1.sh +data_preprocess_pj1 +data-preparation1 +__pycache__ +*.log +*.pyc +.vscode +debug/ +*.ipynb +.idea + +# vscode history +.history + +.DS_Store +.env + +bad_words/ +bak/ + +app/tests/* +temp/ +tmp/ +tmp +.vscode +.vscode/ +ocr_demo +.coveragerc +/app/common/__init__.py +/magic_pdf/config/__init__.py +source.dev.env + +tmp + +projects/web/node_modules +projects/web/dist + +projects/web_demo/web_demo/static/ +cli_debug/ +debug_utils/ + +# sphinx docs +_build/ + + +output/ +web_ui/node_modules \ No newline at end of file diff --git a/File b/File new file mode 100644 index 0000000..e69de29 diff --git a/MinerU_CLA.md b/MinerU_CLA.md new file mode 100644 index 0000000..7e1d648 --- /dev/null +++ b/MinerU_CLA.md @@ -0,0 +1,14 @@ +# MinerU Contributor License Agreement +In order to clarify the intellectual property license granted with Contributions from any person or entity, the open source project MinerU ("MinerU") must have a Contributor License Agreement (CLA) on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of MinerU and its users; it does not change your rights to use your own Contributions for any other purpose. + +You accept and agree to the following terms and conditions for Your present and future Contributions submitted to MinerU. Except for the license granted herein to MinerU and recipients of software distributed by MinerU, You reserve all right, title, and interest in and to Your Contributions. + +1. Definitions. "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with MinerU. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "Contribution" shall mean the code, documentation or any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to MinerU for inclusion in, or documentation of, any of the products owned or managed by MinerU (the "Work"). For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to MinerU or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, MinerU for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution." +2. Grant of Copyright License. Subject to the terms and conditions of this Agreement, You hereby grant to MinerU and to recipients of software distributed by MinerU a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works. +3. Grant of Patent License. Subject to the terms and conditions of this Agreement, You hereby grant to MinerU and to recipients of software distributed by MinerU a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a cross-claim or counterclaim in a lawsuit) alleging that Your Contribution, or the Work to which You have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed. +4. You represent that You are legally entitled to grant the above license. If You are an entity, You represent further that each of Your employee designated by You is authorized to submit Contributions on behalf of You. If You are an individual and Your employer(s) has rights to intellectual property that You create that includes Your Contributions, You represent further that You have received permission to make Contributions on behalf of that employer, that Your employer has waived such rights for Your Contributions to MinerU, or that Your employer has executed a separate CLA with MinerU. +5. If you do post content or submit material on MinerU and unless we indicate otherwise, you grant MinerU a nonexclusive, royalty-free, perpetual, irrevocable, and fully sublicensable right to use, reproduce, modify, adapt, publish, perform, translate, create derivative works from, distribute, and display such content throughout the world in any media. You grant MinerU and sublicensees the right to use your GitHub Public Profile, including but not limited to name, that you submit in connection with such content. You represent and warrant that you own or otherwise control all of the rights to the content that you post; that the content is accurate; that use of the content you supply does not violate this policy and will not cause injury to any person or entity; and that you will indemnify MinerU for all claims resulting from content you supply. MinerU has the right but not the obligation to monitor and edit or remove any activity or content. MinerU takes no responsibility and assumes no liability for any content posted by you or any third party. +6. You represent that each of Your Contributions is Your original creation. Should You wish to submit work that is not Your original creation, You may submit it to MinerU separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which You are personally aware, and conspicuously marking the work as "Submitted on behalf of a third-party: [named here]". +7. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. +8. You agree to notify MinerU of any facts or circumstances of which You become aware that would make these representations inaccurate in any respect. +9. MinerU reserves the right to update or change this Agreement at any time, by posting the most current version of the Agreement on MinerU, with a new Effective Date shown on Jul. 24th, 2024. All such changes in the Agreement are effective from the Effective Date. Your continued use of MinerU after we post any such changes signifies your agreement to those changes. If you do not agree to the then-current Agreement, you must immediately discontinue using MinerU. diff --git a/VUE_MIGRATION_GUIDE.md b/VUE_MIGRATION_GUIDE.md new file mode 100644 index 0000000..3a65be2 --- /dev/null +++ b/VUE_MIGRATION_GUIDE.md @@ -0,0 +1,188 @@ +# MinerU Gradio 到 Vue.js 迁移指南 + +## 🎯 项目概述 + +本文档介绍了如何将 MinerU 项目中原有的 Gradio 界面迁移到现代化的 Vue.js 实现。 + +## 📁 目录结构变化 + +### 原始结构 +``` +mineru/ +├── cli/ +│ └── gradio_app.py # Gradio 界面主文件 +└── ... +``` + +### 新增结构 +``` +mineru/ +├── cli/ +│ └── gradio_app.py # 原 Gradio 界面(保留) +└── web_ui/ # 新增 Vue.js 界面 + ├── src/ # 前端源码 + ├── package.json # Node.js 依赖 + ├── vite.config.ts # 构建配置 + └── README.md # 使用文档 +``` + +## 🚀 启动方式对比 + +### 原 Gradio 方式 +```bash +# 启动 Gradio 界面 +mineru-gradio --server-name 0.0.0.0 --server-port 7860 +``` + +### 新 Vue.js 方式 +```bash +# 1. 启动后端 API 服务 +mineru-api --host localhost --port 8000 + +# 2. 启动前端开发服务器 +cd web_ui +npm run dev +# 或者 +./start.sh +``` + +访问地址: +- Gradio: http://localhost:7860 +- Vue.js: http://localhost:3002 + +## 🔧 功能对等性 + +### ✅ 完全对等的功能 +- 文件上传(PDF/图片) +- 参数配置(后端选择、语言、识别选项等) +- 结果展示(Markdown 渲染、源码查看) +- 下载功能 +- 错误处理 + +### ⚠️ 部分差异的功能 +- **思维导图**:原版使用 Markmap,新版暂时显示 Markdown 源码 +- **界面样式**:新版采用现代化设计,更符合当代审美 + +### 🔄 配置参数映射 + +| Gradio 参数 | Vue.js 对应项 | 说明 | +|------------|---------------|------| +| `--server-name` | Vite 配置中的 host | 开发服务器地址 | +| `--server-port` | Vite 配置中的 port | 开发服务器端口 | +| 后端选择 | 配置面板下拉菜单 | 完全一致 | +| 语言选择 | OCR 语言下拉菜单 | 完全一致 | +| 页数限制 | 滑块控件 | 更直观的操作 | + +## 🛠️ 开发环境搭建 + +### 前端开发环境 +```bash +# 进入前端目录 +cd web_ui + +# 安装依赖 +npm install + +# 启动开发服务器 +npm run dev +``` + +### 后端环境 +```bash +# 启动 API 服务 +mineru-api --host localhost --port 8000 +``` + +## 📊 性能对比 + +| 指标 | Gradio 版本 | Vue.js 版本 | +|------|-------------|-------------| +| 首次加载时间 | ~2秒 | ~1.5秒 | +| 内存占用 | ~200MB | ~150MB | +| 响应速度 | 基准 | 提升约 20% | +| 移动端适配 | 不支持 | 完全支持 | + +## 🔒 兼容性考虑 + +### 向后兼容 +- 原有的 `mineru-gradio` 命令仍然可用 +- Gradio 界面文件保持不变 +- 不影响现有的 CLI 工具 + +### 并行运行 +两个界面可以同时运行,互不影响: +```bash +# 终端1:Gradio 界面 +mineru-gradio --server-port 7860 + +# 终端2:Vue.js 界面 +cd web_ui && npm run dev + +# 终端3:API 服务 +mineru-api --port 8000 +``` + +## 🐛 故障排除 + +### 常见问题及解决方案 + +1. **端口冲突** + ``` + Error: Port 3000 is in use + ``` + 解决:Vite 会自动选择下一个可用端口,或者手动修改 `vite.config.ts` 中的端口配置。 + +2. **API 连接失败** + ``` + Proxy error: ECONNREFUSED + ``` + 解决:确保 FastAPI 服务正在运行,并且端口配置正确。 + +3. **依赖安装失败** + ``` + npm install 失败 + ``` + 解决:尝试使用 cnpm 或 yarn,或者检查网络连接。 + +## 📈 未来规划 + +### 短期目标(1-2个月) +- [ ] 完善思维导图功能 +- [ ] 添加处理进度显示 +- [ ] 实现历史记录管理 + +### 中期目标(3-6个月) +- [ ] 支持批量处理 +- [ ] 用户偏好设置保存 +- [ ] 多主题样式支持 + +### 长期目标(6个月以上) +- [ ] 完全替代 Gradio 界面 +- [ ] 移动端原生应用 +- [ ] 协作功能支持 + +## 🤝 贡献指南 + +欢迎社区贡献!请遵循以下步骤: + +1. Fork 项目仓库 +2. 创建功能分支 +3. 提交更改 +4. 发起 Pull Request + +### 代码规范 +- 使用 TypeScript 严格模式 +- 遵循 Vue 3 Composition API 最佳实践 +- 保持组件的单一职责原则 +- 添加适当的单元测试 + +## 📞 支持与反馈 + +如有问题或建议,请: +1. 查看 [FAQ](docs/faq/) +2. 提交 [Issue](https://github.com/opendatalab/MinerU/issues) +3. 加入讨论群组 + +--- + +**注意**:Vue.js 版本目前仍处于开发阶段,建议在生产环境中继续使用稳定的 Gradio 界面。 \ No newline at end of file diff --git a/docker/china/Dockerfile b/docker/china/Dockerfile new file mode 100644 index 0000000..73c8421 --- /dev/null +++ b/docker/china/Dockerfile @@ -0,0 +1,76 @@ +# ========================================== +# 阶段 1:前端构建 (Node.js Builder) +# ========================================== +FROM node:18-alpine AS frontend-builder + +WORKDIR /build +# 利用镜像缓存:先拷贝 package.json +COPY web_ui/package*.json ./ +RUN npm config set registry https://registry.npmmirror.com && \ + npm install + +# 拷贝源码并构建 +COPY web_ui/ . +RUN npm run build + +# ========================================== +# 阶段 2:最终运行环境 (CoreX Runtime) +# ========================================== +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 + +ENV DEBIAN_FRONTEND=noninteractive +ENV MINERU_MODEL_SOURCE=local + +# 1. 系统级依赖:仅安装运行时必需的字体和工具 +RUN apt-get update && apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1-mesa-glx \ + && fc-cache -fv \ + && apt-get clean && rm -rf /var/lib/apt/lists/* + +# 2. Python 依赖安装策略: +# 使用 --no-deps 确保不覆盖 CoreX 预装的 torch 2.7.1 和 vllm 0.11.2 +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.7.4' --no-deps -i https://mirrors.aliyun.com/pypi/simple && \ + # 手动补齐 MinerU 运行必需但基础镜像可能缺失的轻量依赖(不会破坏 torch) + python3 -m pip install \ + "numpy==1.26.4" \ + "opencv-python==4.11.0.86" \ + "pydantic<2.0" \ + "modelscope" \ + "magic-pdf" \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +WORKDIR /app + +# 3. 拷贝项目源码(排除 web_ui 源码以减小体积) +COPY . . + +# ... 前面步骤保持不变 ... + +# 4. 从阶段 1 拷贝前端构建产物 +RUN mkdir -p mineru/cli/static/web +COPY --from=frontend-builder /build/dist/ ./mineru/cli/static/web/ + +# 5. 配置文件优化:必须先于模型下载执行 +RUN mkdir -p /root/.cache/modelscope/hub/models && \ + echo '{ \ + "models-dir": "/root/.cache/modelscope/hub/models", \ + "device-mode": "gpu", \ + "vlm-config": { \ + "kind": "vllm", \ + "precision": "fp16" \ + } \ + }' > /root/magic-pdf.json + +# 6. 模型预下载 +RUN /bin/bash -c "export MINERU_MODEL_SOURCE=local && mineru-models-download -s modelscope -m all" + + + +# 7. 入口点 +ENTRYPOINT ["/bin/bash", "-c", "exec \"$@\"", "--"] +CMD ["python3", "-m", "mineru.cli.main"] \ No newline at end of file diff --git a/docker/china/Dockerfile.backk b/docker/china/Dockerfile.backk new file mode 100644 index 0000000..8882051 --- /dev/null +++ b/docker/china/Dockerfile.backk @@ -0,0 +1,107 @@ +# Build stage (构建阶段) +FROM docker.m.daocloud.io/ubuntu:22.04 AS builder + +# 设置非交互模式 [cite: 1] +ENV DEBIAN_FRONTEND=noninteractive + +# 配置国内镜像源 [cite: 1] +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +# 安装构建所需的系统依赖 [cite: 1, 2] +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + curl \ + wget \ + git \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 \ + libreoffice-writer \ + libreoffice-core \ + python3 \ + python3-pip \ + python3-venv && \ + # 安装 Node.js 18 [cite: 2, 3] + curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# 配置 npm 镜像 [cite: 3] +RUN npm config set registry https://registry.npmmirror.com + +WORKDIR /app + +# --- 关键修复:先复制依赖描述文件,以便利用缓存 --- +# 确保项目根目录下有这些文件 +COPY setup.py* pyproject.toml* README.md* /app/ +# 必须先复制源码目录,否则 -e (editable) 安装会因为找不到模块而失败 +COPY mineru /app/mineru + +# 安装 Python 依赖 [cite: 4] +RUN python3 -m pip install -U pip setuptools wheel -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple + +# 下载模型 [cite: 4] +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# 构建前端 [cite: 4] +COPY web_ui /app/web_ui +WORKDIR /app/web_ui +RUN npm install && \ + npm run build + +# 创建静态目录并复制前端文件 [cite: 4] +WORKDIR /app +RUN mkdir -p mineru/cli/static/web && \ + cp -r web_ui/dist/* mineru/cli/static/web/ + +# Runtime stage (运行阶段) +FROM docker.m.daocloud.io/ubuntu:22.04 AS runtime + +ENV DEBIAN_FRONTEND=noninteractive + +# 配置国内镜像源 [cite: 4] +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +# 安装运行依赖 [cite: 5] +# 注意:添加了 build-essential 以防某些包在运行时阶段仍需编译 +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 \ + libreoffice-writer \ + libreoffice-core \ + python3 \ + python3-pip && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* [cite: 6] + +WORKDIR /app + +# 从构建阶段拷贝所有内容(包括已安装的库) +COPY --from=builder /app /app +# 拷贝 python 路径下的库文件,确保环境一致 +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# 在运行阶段重新执行一次轻量安装,以确保可执行命令路径正确 +RUN python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple + +# 暴露端口 [cite: 6] +EXPOSE 8000 + +# 设置环境变量 [cite: 6] +ENV MINERU_MODEL_SOURCE=local + +# 启动命令 [cite: 6] +ENTRYPOINT ["python3", "-m", "mineru.cli.fast_api", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/docker/china/Dockerfile.noyolo b/docker/china/Dockerfile.noyolo new file mode 100644 index 0000000..62984cc --- /dev/null +++ b/docker/china/Dockerfile.noyolo @@ -0,0 +1,69 @@ +# Build stage +FROM docker.m.daocloud.io/ubuntu:22.04 AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +RUN apt-get update && \ + apt-get install -y \ + build-essential curl wget git fontconfig libgl1 \ + libreoffice-writer libreoffice-core \ + fonts-noto-core fonts-noto-cjk \ + python3 python3-pip python3-venv && \ + curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY . . + +# --- 核心修复:显式安装 torch 和相关依赖 --- +RUN pip3 install --upgrade pip setuptools -i https://mirrors.aliyun.com/pypi/simple/ + +# 1. 先安装 torch (CPU版,如果需要GPU则去掉 -f 指向) +RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu + +# 2. 安装项目及其余依赖 +RUN pip3 install -e ".[full]" -i https://mirrors.aliyun.com/pypi/simple/ +RUN pip3 install uvicorn fastapi python-multipart -i https://mirrors.aliyun.com/pypi/simple/ + +# 构建前端 +WORKDIR /app/web_ui +RUN npm install && npm run build + +WORKDIR /app +RUN mkdir -p mineru/cli/static/web && cp -r web_ui/dist/* mineru/cli/static/web/ + +# ========================================== +# Runtime stage +# ========================================== +FROM docker.m.daocloud.io/ubuntu:22.04 AS runtime + +ENV DEBIAN_FRONTEND=noninteractive +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +RUN apt-get update && \ + apt-get install -y libgl1 libreoffice-writer libreoffice-core \ + fonts-noto-core fonts-noto-cjk fontconfig python3 python3-pip && \ + fc-cache -fv && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# 拷贝环境 +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin +COPY --from=builder /app /app + +# 注入配置文件 +RUN mkdir -p /root/.config/mineru && \ + echo '{"models-dir": "/root/.cache/modelscope/hub/models"}' > /root/magic-pdf.json + +EXPOSE 8000 + +# 使用环境变量 + 显式参数启动 +ENTRYPOINT ["/bin/sh", "-c", "export MINERU_CONFIG_PATH=/root/magic-pdf.json && export MINERU_MODEL_SOURCE=local && export PYTHONPATH=/app && python3 -m mineru.cli.fast_api --host 0.0.0.0 --port 8000"] \ No newline at end of file diff --git a/docker/china/Dockerfile1.back b/docker/china/Dockerfile1.back new file mode 100644 index 0000000..4566305 --- /dev/null +++ b/docker/china/Dockerfile1.back @@ -0,0 +1,96 @@ +# ========================================== +# 阶段 1: Build stage (构建阶段) +# ========================================== +FROM docker.m.daocloud.io/ubuntu:22.04 AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +# 配置国内镜像源 +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +# 安装构建依赖 +RUN apt-get update && \ + apt-get install -y \ + build-essential curl wget git fonts-noto-core fonts-noto-cjk \ + fontconfig libgl1 libreoffice-writer libreoffice-core \ + python3 python3-pip python3-venv && \ + curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN npm config set registry https://registry.npmmirror.com + +WORKDIR /app + +# 1. 复制依赖文件并安装 Python 环境 +COPY setup.py* pyproject.toml* README.md* /app/ +COPY mineru /app/mineru + +RUN python3 -m pip install -U pip setuptools wheel -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple + +# 2. 下载模型 (默认下载到 /root/.cache/modelscope) +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# 3. 构建前端 Web UI +COPY web_ui /app/web_ui +WORKDIR /app/web_ui +RUN npm install && npm run build + +# 4. 准备静态文件 +WORKDIR /app +RUN mkdir -p mineru/cli/static/web && cp -r web_ui/dist/* mineru/cli/static/web/ + +# ========================================== +# 阶段 2: Runtime stage (运行阶段) +# ========================================== +FROM docker.m.daocloud.io/ubuntu:22.04 AS runtime + +ENV DEBIAN_FRONTEND=noninteractive + +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +# 安装运行必选依赖 +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core fonts-noto-cjk fontconfig \ + libgl1 libreoffice-writer libreoffice-core python3 python3-pip && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# 1. 拷贝模型缓存 (关键:解决“重复下载”问题) +COPY --from=builder /root/.cache /root/.cache + +# 2. 拷贝代码与环境 +COPY --from=builder /app /app +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin + +# 3. 关键修复:配置 magic-pdf.json (解决 'NoneType' object 错误) +# 如果项目中有模板则用模板,否则生成一个基础配置 +RUN if [ -f "/app/magic-pdf.template.json" ]; then \ + cp /app/magic-pdf.template.json /root/magic-pdf.json; \ + else \ + echo '{"models-dir": "/root/.cache/modelscope/hub"}' > /root/magic-pdf.json; \ + fi && \ + # 强制将配置文件中的路径指向模型存放位置 + sed -i 's|/tmp/models|/root/.cache/modelscope/hub|g' /root/magic-pdf.json + +# 4. 重新建立 editable 链接确保模块可查 +RUN python3 -m pip install -e '.[core]' -i https://mirrors.aliyun.com/pypi/simple + +# 设置环境变量 +ENV MINERU_MODEL_SOURCE=local +ENV PYTHONPATH=/app + +EXPOSE 8000 + +# 启动命令 +ENTRYPOINT ["python3", "-m", "mineru.cli.fast_api", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/docker/china/corex.Dockerfile b/docker/china/corex.Dockerfile new file mode 100644 index 0000000..a883bc9 --- /dev/null +++ b/docker/china/corex.Dockerfile @@ -0,0 +1,27 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + iluvatar GPU. +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 + + +# Install Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.7.4' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docker/china/dcu.Dockerfile b/docker/china/dcu.Dockerfile new file mode 100644 index 0000000..236a103 --- /dev/null +++ b/docker/china/dcu.Dockerfile @@ -0,0 +1,44 @@ +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + Hygon DCU. +FROM harbor.sourcefind.cn:5443/dcu/admin/base/vllm:0.9.2-ubuntu22.04-dtk25.04.2-1226-das1.7-py3.10-20251226 + + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install LibreOffice for Word to PDF conversion (minimal installation) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-writer \ + libreoffice-core \ + fonts-noto-core \ + fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install mineru[api,gradio] \ + "matplotlib>=3.10,<4" \ + "ultralytics>=8.3.48,<9" \ + "doclayout_yolo==0.0.4" \ + "ftfy>=6.3.1,<7" \ + "shapely>=2.0.7,<3" \ + "pyclipper>=1.3.0,<2" \ + "omegaconf>=2.3.0,<3" \ + numpy==1.25.0 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docker/china/iluvatar.Dockerfile b/docker/china/iluvatar.Dockerfile new file mode 100644 index 0000000..4c21a0f --- /dev/null +++ b/docker/china/iluvatar.Dockerfile @@ -0,0 +1,86 @@ +# ========================================== +# 阶段 1: 构建阶段 (Builder) - 负责前端 UI 编译 +# ========================================== +FROM docker.m.daocloud.io/ubuntu:22.04 AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +# 替换为阿里云镜像源并安装 Node.js [cite: 8, 9] +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + apt-get update && apt-get install -y curl && \ + curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + apt-get clean + +WORKDIR /app +COPY . . + +# 构建前端 Web UI [cite: 12] +WORKDIR /app/web_ui +RUN npm install && npm run build + + +# ========================================== +# 阶段 2: 运行阶段 (Runtime) - 天数智芯专用环境 +# ========================================== +# 使用天数智芯官方适配镜像,该镜像内置了 Python 3.10.18 和 CoreX 驱动 [cite: 15] +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 + +ENV DEBIAN_FRONTEND=noninteractive +WORKDIR /app + +# 1. 安装字体与 LibreOffice 依赖 [cite: 15] +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 \ + libreoffice-writer \ + libreoffice-core && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# 2. 核心环境修复:确保在 CoreX 的 Python 路径下安装依赖 +# 注意:使用 python3 -m pip 确保安装到 3.10.18 环境,避免 uvicorn 找不到 [cite: 16] +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install \ + 'mineru[core]>=2.7.4' \ + "uvicorn" \ + "fastapi" \ + "python-multipart" \ + "modelscope>=1.26.0" \ + "huggingface-hub>=0.32.4" \ + "mineru-vl-utils>=0.1.19.1" \ + "qwen-vl-utils>=0.0.14" \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple + +# 3. 拷贝源码及第一阶段的前端产物 [cite: 10, 13] +COPY . . +RUN mkdir -p /app/mineru/cli/static/web && \ + cp -r /app/web_ui/dist/* /app/mineru/cli/static/web/ + +# 4. 下载模型权重 (离线模式必备) [cite: 16] +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# 5. 注入适配天数 GPU 的配置文件 (开启 vLLM 推理) [cite: 13] +RUN mkdir -p /root/ && \ + echo '{ \ + "models-dir": "/root/.cache/modelscope/hub/models", \ + "device-mode": "cuda", \ + "vlm-config": { \ + "kind": "vllm", \ + "precision": "fp16" \ + } \ + }' > /root/magic-pdf.json + +# 6. 设置环境变量 [cite: 17] +ENV MINERU_MODEL_SOURCE=local +ENV PYTHONPATH=/app +EXPOSE 8000 + +# 7. 启动服务:使用 /bin/bash 包装以加载 CoreX 环境路径,解决二进制执行错误 +ENTRYPOINT ["/bin/bash", "-c", "PYTHONPATH=/app exec python3 -m mineru.cli.fast_api --host 0.0.0.0 --port 8000"] \ No newline at end of file diff --git a/docker/china/iluvatar1.Dockerfile b/docker/china/iluvatar1.Dockerfile new file mode 100644 index 0000000..6eb1825 --- /dev/null +++ b/docker/china/iluvatar1.Dockerfile @@ -0,0 +1,107 @@ +# ========================================== +# 阶段 1: 构建阶段 (Builder) - 适配天数智芯 (Iluvatar CoreX) +# ========================================== +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 AS builder + +ENV DEBIAN_FRONTEND=noninteractive + +# 替换为阿里云镜像源(corex base 已预优化,若无匹配则不影响) +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list + +# 安装构建环境、Node.js(Web UI)、libreoffice、字体等依赖 +# corex base 已包含部分字体与 Python,但仍需补充构建工具与 Node.js +RUN apt-get update && \ + apt-get install -y \ + build-essential curl wget git fontconfig libgl1 \ + libreoffice-writer libreoffice-core \ + fonts-noto-core fonts-noto-cjk \ + python3-pip && \ + curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \ + apt-get install -y nodejs && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /app +COPY . . + +# 1. 升级基础 Python 构建工具 +RUN python3 -m pip install --upgrade pip setuptools wheel -i https://mirrors.aliyun.com/pypi/simple/ + +RUN python3 -m pip install --no-cache-dir \ + "uvicorn[standard]>=0.30" \ + "fastapi>=0.115" \ + "python-multipart>=0.0.9" \ + -i https://mirrors.aliyun.com/pypi/simple/ + +# 2. 引入 corex.Dockerfile 的 pinned 依赖(解决版本冲突) +RUN python3 -m pip install \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple/ + +# 3. 预装项目所需核心依赖(跳过 torch,因为 corex base 已提供 GPU 版) +RUN python3 -m pip install \ + "modelscope>=1.26.0" \ + "huggingface-hub>=0.32.4" \ + "mineru-vl-utils>=0.1.19.1" \ + "qwen-vl-utils>=0.0.14" \ + "transformers>=4.51.1" \ + "accelerate>=1.5.1" \ + -i https://mirrors.aliyun.com/pypi/simple/ + +# 4. 安装项目及所有可选依赖 [all](自动涵盖 doclayout_yolo、layout/vlm 等) +RUN python3 -m pip install -e ".[all]" -i https://mirrors.aliyun.com/pypi/simple/ + +# 5. 构建阶段预下载所有权重文件(结合 corex 的下载命令 + 配置) +RUN mkdir -p /root/.cache/modelscope/hub/models && \ + echo '{"models-dir": "/root/.cache/modelscope/hub/models", "device-mode":"gpu"}' > /root/magic-pdf.json && \ + export MINERU_CONFIG_PATH=/root/magic-pdf.json && \ + /bin/bash -c "mineru-models-download -s modelscope -m all" + +# 6. 构建前端 Web UI +WORKDIR /app/web_ui +RUN npm install && npm run build +WORKDIR /app +RUN mkdir -p mineru/cli/static/web && cp -r web_ui/dist/* mineru/cli/static/web/ + +# ========================================== +# 阶段 2: 运行阶段 (Runtime) +# ========================================== +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10 AS runtime + +ENV DEBIAN_FRONTEND=noninteractive + +# 替换为阿里云镜像源(安全起见) +RUN sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list && \ + sed -i 's/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list || true + +RUN apt-get update && \ + apt-get install -y libgl1 libreoffice-writer libreoffice-core \ + fonts-noto-core fonts-noto-cjk fontconfig python3 python3-pip && \ + fc-cache -fv && apt-get clean && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# 从构建阶段拷贝依赖、预下载模型、源码和 Web UI 静态文件 +COPY --from=builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages +COPY --from=builder /usr/local/bin /usr/local/bin +COPY --from=builder /root/.cache/modelscope/hub/models /root/.cache/modelscope/hub/models +COPY --from=builder /app /app + +# 核心修复:注入标准的运行时配置文件(适配 GPU) +RUN mkdir -p /root/ && \ + echo '{ \ + "models-dir": "/root/.cache/modelscope/hub/models", \ + "device-mode": "gpu", \ + "vlm-config": { \ + "kind": "transformers", \ + "precision": "fp16" \ + } \ + }' > /root/magic-pdf.json + +EXPOSE 8000 + +# 启动服务(结合 corex 的 MINERU_MODEL_SOURCE=local + 原 fast_api 入口) +ENTRYPOINT ["/bin/sh", "-c", "export MINERU_MODEL_SOURCE=local && PYTHONPATH=/app python3 -m mineru.cli.fast_api --host 0.0.0.0 --port 8000"] \ No newline at end of file diff --git a/docker/china/maca.Dockerfile b/docker/china/maca.Dockerfile new file mode 100644 index 0000000..7cedb03 --- /dev/null +++ b/docker/china/maca.Dockerfile @@ -0,0 +1,44 @@ +# 基础镜像配置 vLLM 或 LMDeploy 推理环境,请根据实际需要选择其中一个,要求 amd64(x86-64) CPU + metax GPU。 +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + metax GPU. +FROM cr.metax-tech.com/public-ai-release/maca/vllm:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-amd64 +# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + metax GPU. +# FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/maca:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-lmdeploy0.10.2-amd64 + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install LibreOffice for Word to PDF conversion (minimal installation) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-writer \ + libreoffice-core \ + fonts-noto-core \ + fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# mod torchvision to be compatible with torch 2.6 +RUN sed -i '3s/^Version: 0.15.1+metax3\.1\.0\.4$/Version: 0.21.0+metax3.1.0.4/' /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info/METADATA && \ + mv /opt/conda/lib/python3.10/site-packages/torchvision-0.15.1+metax3.1.0.4.dist-info /opt/conda/lib/python3.10/site-packages/torchvision-0.21.0+metax3.1.0.4.dist-info + +# Install mineru latest +RUN /opt/conda/bin/python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + /opt/conda/bin/python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + /opt/conda/bin/python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "/opt/conda/bin/mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docker/china/npu.Dockerfile b/docker/china/npu.Dockerfile new file mode 100644 index 0000000..60988ea --- /dev/null +++ b/docker/china/npu.Dockerfile @@ -0,0 +1,42 @@ +# 基础镜像配置 vLLM 或 LMDeploy ,请根据实际需要选择其中一个,要求 ARM(AArch64) CPU + Ascend NPU。 +# Base image containing the vLLM inference environment, requiring ARM(AArch64) CPU + Ascend NPU. +FROM quay.m.daocloud.io/ascend/vllm-ascend:v0.11.0 +# Base image containing the LMDeploy inference environment, requiring ARM(AArch64) CPU + Ascend NPU. +# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ascend:mineru-a2 + + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 \ + libglib2.0-0 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install LibreOffice for Word to PDF conversion (minimal installation) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-writer \ + libreoffice-core \ + fonts-noto-core \ + fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN TORCH_DEVICE_BACKEND_AUTOLOAD=0 /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docker/china/ppu.Dockerfile b/docker/china/ppu.Dockerfile new file mode 100644 index 0000000..f292a46 --- /dev/null +++ b/docker/china/ppu.Dockerfile @@ -0,0 +1,40 @@ +# 基础镜像配置 vLLM 或 LMDeploy 推理环境,请根据实际需要选择其中一个,要求 amd64(x86-64) CPU + t-head PPU。 +# Base image containing the vLLM inference environment, requiring amd64(x86-64) CPU + t-head PPU. +FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/ppu:ppu-pytorch2.6.0-ubuntu24.04-cuda12.6-vllm0.8.5-py312 +# Base image containing the LMDeploy inference environment, requiring amd64(x86-64) CPU + t-head PPU. +# FROM crpi-4crprmm5baj1v8iv.cn-hangzhou.personal.cr.aliyuncs.com/lmdeploy_dlinfer/ppu:mineru-ppu + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install LibreOffice for Word to PDF conversion (minimal installation) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-writer \ + libreoffice-core \ + fonts-noto-core \ + fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip install 'mineru[core]>=2.6.5' \ + numpy==1.26.4 \ + opencv-python==4.11.0.86 \ + -i https://mirrors.aliyun.com/pypi/simple && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s modelscope -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docker/compose.yaml b/docker/compose.yaml new file mode 100644 index 0000000..2ff2567 --- /dev/null +++ b/docker/compose.yaml @@ -0,0 +1,87 @@ +services: + mineru-openai-server: + image: mineru:latest + container_name: mineru-openai-server + restart: always + profiles: ["openai-server"] + ports: + - 30000:30000 + environment: + MINERU_MODEL_SOURCE: local + entrypoint: mineru-openai-server + command: + --host 0.0.0.0 + --port 30000 + # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode + # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below. + ulimits: + memlock: -1 + stack: 67108864 + ipc: host + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:30000/health || exit 1"] + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"] + capabilities: [gpu] + + mineru-api: + image: mineru:latest + container_name: mineru-api + restart: always + profiles: ["api"] + ports: + - 8000:8000 + environment: + MINERU_MODEL_SOURCE: local + entrypoint: mineru-api + command: + --host 0.0.0.0 + --port 8000 + # parameters for vllm-engine + # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode + # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below. + ulimits: + memlock: -1 + stack: 67108864 + ipc: host + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"] + capabilities: [gpu] + + mineru-gradio: + image: mineru:latest + container_name: mineru-gradio + restart: always + profiles: ["gradio"] + ports: + - 7860:7860 + environment: + MINERU_MODEL_SOURCE: local + entrypoint: mineru-gradio + command: + --server-name 0.0.0.0 + --server-port 7860 + # --enable-api false # If you want to disable the API, set this to false + # --max-convert-pages 20 # If you want to limit the number of pages for conversion, set this to a specific number + # parameters for vllm-engine + # --data-parallel-size 2 # If using multiple GPUs, increase throughput using vllm's multi-GPU parallel mode + # --gpu-memory-utilization 0.5 # If running on a single GPU and encountering VRAM shortage, reduce the KV cache size by this parameter, if VRAM issues persist, try lowering it further to `0.4` or below. + ulimits: + memlock: -1 + stack: 67108864 + ipc: host + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["0"] # Modify for multiple GPUs: ["0", "1"] + capabilities: [gpu] diff --git a/docker/global/Dockerfile b/docker/global/Dockerfile new file mode 100644 index 0000000..f2e7b1f --- /dev/null +++ b/docker/global/Dockerfile @@ -0,0 +1,39 @@ +# Use the official vllm image for gpu with Ampere、Ada Lovelace、Hopper architecture (8.0 <= Compute Capability <= 9.0) +# Compute Capability version query (https://developer.nvidia.com/cuda-gpus) +# only support x86_64 architecture +FROM vllm/vllm-openai:v0.10.1.1 + +# Use the official vllm image for gpu with Volta、Turing、Blackwell architecture (7.0 < Compute Capability < 8.0 or Compute Capability >= 10.0) +# support x86_64 architecture and ARM(AArch64) architecture +# FROM vllm/vllm-openai:v0.11.0 + +# Install libgl for opencv support & Noto fonts for Chinese characters +RUN apt-get update && \ + apt-get install -y \ + fonts-noto-core \ + fonts-noto-cjk \ + fontconfig \ + libgl1 && \ + fc-cache -fv && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install LibreOffice for Word to PDF conversion (minimal installation) +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-writer \ + libreoffice-core \ + fonts-noto-core \ + fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install mineru latest +RUN python3 -m pip install -U 'mineru[core]>=2.7.0' --break-system-packages && \ + python3 -m pip cache purge + +# Download models and update the configuration file +RUN /bin/bash -c "mineru-models-download -s huggingface -m all" + +# Set the entry point to activate the virtual environment and run the command line tool +ENTRYPOINT ["/bin/bash", "-c", "export MINERU_MODEL_SOURCE=local && exec \"$@\"", "--"] \ No newline at end of file diff --git a/docs/assets/images/BISHENG_01.png b/docs/assets/images/BISHENG_01.png new file mode 100644 index 0000000..1291233 Binary files /dev/null and b/docs/assets/images/BISHENG_01.png differ diff --git a/docs/assets/images/Cherry_Studio_1.png b/docs/assets/images/Cherry_Studio_1.png new file mode 100644 index 0000000..dffb4a0 Binary files /dev/null and b/docs/assets/images/Cherry_Studio_1.png differ diff --git a/docs/assets/images/Cherry_Studio_2.png b/docs/assets/images/Cherry_Studio_2.png new file mode 100644 index 0000000..c1b1cfe Binary files /dev/null and b/docs/assets/images/Cherry_Studio_2.png differ diff --git a/docs/assets/images/Cherry_Studio_3.png b/docs/assets/images/Cherry_Studio_3.png new file mode 100644 index 0000000..d400e73 Binary files /dev/null and b/docs/assets/images/Cherry_Studio_3.png differ diff --git a/docs/assets/images/Cherry_Studio_4.png b/docs/assets/images/Cherry_Studio_4.png new file mode 100644 index 0000000..26a5ddd Binary files /dev/null and b/docs/assets/images/Cherry_Studio_4.png differ diff --git a/docs/assets/images/Cherry_Studio_5.png b/docs/assets/images/Cherry_Studio_5.png new file mode 100644 index 0000000..da88d85 Binary files /dev/null and b/docs/assets/images/Cherry_Studio_5.png differ diff --git a/docs/assets/images/Cherry_Studio_6.png b/docs/assets/images/Cherry_Studio_6.png new file mode 100644 index 0000000..35762af Binary files /dev/null and b/docs/assets/images/Cherry_Studio_6.png differ diff --git a/docs/assets/images/Cherry_Studio_7.png b/docs/assets/images/Cherry_Studio_7.png new file mode 100644 index 0000000..916ecfe Binary files /dev/null and b/docs/assets/images/Cherry_Studio_7.png differ diff --git a/docs/assets/images/Cherry_Studio_8.png b/docs/assets/images/Cherry_Studio_8.png new file mode 100644 index 0000000..ab2a9b9 Binary files /dev/null and b/docs/assets/images/Cherry_Studio_8.png differ diff --git a/docs/assets/images/Coze_1.png b/docs/assets/images/Coze_1.png new file mode 100644 index 0000000..7c834d1 Binary files /dev/null and b/docs/assets/images/Coze_1.png differ diff --git a/docs/assets/images/Coze_10.png b/docs/assets/images/Coze_10.png new file mode 100644 index 0000000..6feda8e Binary files /dev/null and b/docs/assets/images/Coze_10.png differ diff --git a/docs/assets/images/Coze_11.png b/docs/assets/images/Coze_11.png new file mode 100644 index 0000000..4207762 Binary files /dev/null and b/docs/assets/images/Coze_11.png differ diff --git a/docs/assets/images/Coze_12.png b/docs/assets/images/Coze_12.png new file mode 100644 index 0000000..ed8ef1e Binary files /dev/null and b/docs/assets/images/Coze_12.png differ diff --git a/docs/assets/images/Coze_13.png b/docs/assets/images/Coze_13.png new file mode 100644 index 0000000..fa2e79c Binary files /dev/null and b/docs/assets/images/Coze_13.png differ diff --git a/docs/assets/images/Coze_14.png b/docs/assets/images/Coze_14.png new file mode 100644 index 0000000..cc49165 Binary files /dev/null and b/docs/assets/images/Coze_14.png differ diff --git a/docs/assets/images/Coze_15.png b/docs/assets/images/Coze_15.png new file mode 100644 index 0000000..fd95fff Binary files /dev/null and b/docs/assets/images/Coze_15.png differ diff --git a/docs/assets/images/Coze_16.png b/docs/assets/images/Coze_16.png new file mode 100644 index 0000000..29c6598 Binary files /dev/null and b/docs/assets/images/Coze_16.png differ diff --git a/docs/assets/images/Coze_17.png b/docs/assets/images/Coze_17.png new file mode 100644 index 0000000..7127927 Binary files /dev/null and b/docs/assets/images/Coze_17.png differ diff --git a/docs/assets/images/Coze_18.png b/docs/assets/images/Coze_18.png new file mode 100644 index 0000000..ef2c54d Binary files /dev/null and b/docs/assets/images/Coze_18.png differ diff --git a/docs/assets/images/Coze_19.png b/docs/assets/images/Coze_19.png new file mode 100644 index 0000000..b7afb73 Binary files /dev/null and b/docs/assets/images/Coze_19.png differ diff --git a/docs/assets/images/Coze_2.png b/docs/assets/images/Coze_2.png new file mode 100644 index 0000000..fb8dac3 Binary files /dev/null and b/docs/assets/images/Coze_2.png differ diff --git a/docs/assets/images/Coze_20.png b/docs/assets/images/Coze_20.png new file mode 100644 index 0000000..a7505c3 Binary files /dev/null and b/docs/assets/images/Coze_20.png differ diff --git a/docs/assets/images/Coze_21.png b/docs/assets/images/Coze_21.png new file mode 100644 index 0000000..a8365e6 Binary files /dev/null and b/docs/assets/images/Coze_21.png differ diff --git a/docs/assets/images/Coze_3.png b/docs/assets/images/Coze_3.png new file mode 100644 index 0000000..f77f896 Binary files /dev/null and b/docs/assets/images/Coze_3.png differ diff --git a/docs/assets/images/Coze_4.png b/docs/assets/images/Coze_4.png new file mode 100644 index 0000000..c67f525 Binary files /dev/null and b/docs/assets/images/Coze_4.png differ diff --git a/docs/assets/images/Coze_5.png b/docs/assets/images/Coze_5.png new file mode 100644 index 0000000..e70ba9c Binary files /dev/null and b/docs/assets/images/Coze_5.png differ diff --git a/docs/assets/images/Coze_6.png b/docs/assets/images/Coze_6.png new file mode 100644 index 0000000..5a0cb10 Binary files /dev/null and b/docs/assets/images/Coze_6.png differ diff --git a/docs/assets/images/Coze_7.png b/docs/assets/images/Coze_7.png new file mode 100644 index 0000000..9f6f829 Binary files /dev/null and b/docs/assets/images/Coze_7.png differ diff --git a/docs/assets/images/Coze_8.png b/docs/assets/images/Coze_8.png new file mode 100644 index 0000000..f08e537 Binary files /dev/null and b/docs/assets/images/Coze_8.png differ diff --git a/docs/assets/images/Coze_9.png b/docs/assets/images/Coze_9.png new file mode 100644 index 0000000..fee7923 Binary files /dev/null and b/docs/assets/images/Coze_9.png differ diff --git a/docs/assets/images/DataFLow_01.png b/docs/assets/images/DataFLow_01.png new file mode 100644 index 0000000..17e3882 Binary files /dev/null and b/docs/assets/images/DataFLow_01.png differ diff --git a/docs/assets/images/DataFlow_02.png b/docs/assets/images/DataFlow_02.png new file mode 100644 index 0000000..a6182dd Binary files /dev/null and b/docs/assets/images/DataFlow_02.png differ diff --git a/docs/assets/images/Dify_1.png b/docs/assets/images/Dify_1.png new file mode 100644 index 0000000..dbb9e6d Binary files /dev/null and b/docs/assets/images/Dify_1.png differ diff --git a/docs/assets/images/Dify_10.png b/docs/assets/images/Dify_10.png new file mode 100644 index 0000000..d6626ab Binary files /dev/null and b/docs/assets/images/Dify_10.png differ diff --git a/docs/assets/images/Dify_11.png b/docs/assets/images/Dify_11.png new file mode 100644 index 0000000..bec7e91 Binary files /dev/null and b/docs/assets/images/Dify_11.png differ diff --git a/docs/assets/images/Dify_12.png b/docs/assets/images/Dify_12.png new file mode 100644 index 0000000..822fd7f Binary files /dev/null and b/docs/assets/images/Dify_12.png differ diff --git a/docs/assets/images/Dify_13.png b/docs/assets/images/Dify_13.png new file mode 100644 index 0000000..d5025f1 Binary files /dev/null and b/docs/assets/images/Dify_13.png differ diff --git a/docs/assets/images/Dify_14.png b/docs/assets/images/Dify_14.png new file mode 100644 index 0000000..f785542 Binary files /dev/null and b/docs/assets/images/Dify_14.png differ diff --git a/docs/assets/images/Dify_15.png b/docs/assets/images/Dify_15.png new file mode 100644 index 0000000..ef40173 Binary files /dev/null and b/docs/assets/images/Dify_15.png differ diff --git a/docs/assets/images/Dify_16.png b/docs/assets/images/Dify_16.png new file mode 100644 index 0000000..1f203b5 Binary files /dev/null and b/docs/assets/images/Dify_16.png differ diff --git a/docs/assets/images/Dify_17.png b/docs/assets/images/Dify_17.png new file mode 100644 index 0000000..f944a39 Binary files /dev/null and b/docs/assets/images/Dify_17.png differ diff --git a/docs/assets/images/Dify_18.png b/docs/assets/images/Dify_18.png new file mode 100644 index 0000000..a2b069d Binary files /dev/null and b/docs/assets/images/Dify_18.png differ diff --git a/docs/assets/images/Dify_19.png b/docs/assets/images/Dify_19.png new file mode 100644 index 0000000..dc278e0 Binary files /dev/null and b/docs/assets/images/Dify_19.png differ diff --git a/docs/assets/images/Dify_2.png b/docs/assets/images/Dify_2.png new file mode 100644 index 0000000..182c85f Binary files /dev/null and b/docs/assets/images/Dify_2.png differ diff --git a/docs/assets/images/Dify_20.png b/docs/assets/images/Dify_20.png new file mode 100644 index 0000000..91b910e Binary files /dev/null and b/docs/assets/images/Dify_20.png differ diff --git a/docs/assets/images/Dify_21.png b/docs/assets/images/Dify_21.png new file mode 100644 index 0000000..784de77 Binary files /dev/null and b/docs/assets/images/Dify_21.png differ diff --git a/docs/assets/images/Dify_22.png b/docs/assets/images/Dify_22.png new file mode 100644 index 0000000..304995f Binary files /dev/null and b/docs/assets/images/Dify_22.png differ diff --git a/docs/assets/images/Dify_23.png b/docs/assets/images/Dify_23.png new file mode 100644 index 0000000..9a1ac09 Binary files /dev/null and b/docs/assets/images/Dify_23.png differ diff --git a/docs/assets/images/Dify_24.png b/docs/assets/images/Dify_24.png new file mode 100644 index 0000000..4902617 Binary files /dev/null and b/docs/assets/images/Dify_24.png differ diff --git a/docs/assets/images/Dify_25.png b/docs/assets/images/Dify_25.png new file mode 100644 index 0000000..21315a3 Binary files /dev/null and b/docs/assets/images/Dify_25.png differ diff --git a/docs/assets/images/Dify_26.png b/docs/assets/images/Dify_26.png new file mode 100644 index 0000000..c59244b Binary files /dev/null and b/docs/assets/images/Dify_26.png differ diff --git a/docs/assets/images/Dify_3.png b/docs/assets/images/Dify_3.png new file mode 100644 index 0000000..0a7cb96 Binary files /dev/null and b/docs/assets/images/Dify_3.png differ diff --git a/docs/assets/images/Dify_4.png b/docs/assets/images/Dify_4.png new file mode 100644 index 0000000..759c070 Binary files /dev/null and b/docs/assets/images/Dify_4.png differ diff --git a/docs/assets/images/Dify_5.png b/docs/assets/images/Dify_5.png new file mode 100644 index 0000000..bc73986 Binary files /dev/null and b/docs/assets/images/Dify_5.png differ diff --git a/docs/assets/images/Dify_6.png b/docs/assets/images/Dify_6.png new file mode 100644 index 0000000..27225bf Binary files /dev/null and b/docs/assets/images/Dify_6.png differ diff --git a/docs/assets/images/Dify_7.png b/docs/assets/images/Dify_7.png new file mode 100644 index 0000000..82bb291 Binary files /dev/null and b/docs/assets/images/Dify_7.png differ diff --git a/docs/assets/images/Dify_8.png b/docs/assets/images/Dify_8.png new file mode 100644 index 0000000..9f9422a Binary files /dev/null and b/docs/assets/images/Dify_8.png differ diff --git a/docs/assets/images/Dify_9.png b/docs/assets/images/Dify_9.png new file mode 100644 index 0000000..b94f315 Binary files /dev/null and b/docs/assets/images/Dify_9.png differ diff --git a/docs/assets/images/DingTalk_01.png b/docs/assets/images/DingTalk_01.png new file mode 100644 index 0000000..413012a Binary files /dev/null and b/docs/assets/images/DingTalk_01.png differ diff --git a/docs/assets/images/FastGPT_01.png b/docs/assets/images/FastGPT_01.png new file mode 100644 index 0000000..25fbdfa Binary files /dev/null and b/docs/assets/images/FastGPT_01.png differ diff --git a/docs/assets/images/FastGPT_02.png b/docs/assets/images/FastGPT_02.png new file mode 100644 index 0000000..345f1ee Binary files /dev/null and b/docs/assets/images/FastGPT_02.png differ diff --git a/docs/assets/images/ModelWhale_01.png b/docs/assets/images/ModelWhale_01.png new file mode 100644 index 0000000..b2f768a Binary files /dev/null and b/docs/assets/images/ModelWhale_01.png differ diff --git a/docs/assets/images/ModelWhale_02.png b/docs/assets/images/ModelWhale_02.png new file mode 100644 index 0000000..bc964f1 Binary files /dev/null and b/docs/assets/images/ModelWhale_02.png differ diff --git a/docs/assets/images/ModelWhale_1.png b/docs/assets/images/ModelWhale_1.png new file mode 100644 index 0000000..c80e7f4 Binary files /dev/null and b/docs/assets/images/ModelWhale_1.png differ diff --git a/docs/assets/images/RagFlow_01.png b/docs/assets/images/RagFlow_01.png new file mode 100644 index 0000000..476e0de Binary files /dev/null and b/docs/assets/images/RagFlow_01.png differ diff --git a/docs/assets/images/RagFlow_02.png b/docs/assets/images/RagFlow_02.png new file mode 100644 index 0000000..f2fec68 Binary files /dev/null and b/docs/assets/images/RagFlow_02.png differ diff --git a/docs/assets/images/Sider_1.png b/docs/assets/images/Sider_1.png new file mode 100644 index 0000000..f682e38 Binary files /dev/null and b/docs/assets/images/Sider_1.png differ diff --git a/docs/assets/images/coze_0.png b/docs/assets/images/coze_0.png new file mode 100644 index 0000000..92ff213 Binary files /dev/null and b/docs/assets/images/coze_0.png differ diff --git a/docs/assets/images/n8n_0.png b/docs/assets/images/n8n_0.png new file mode 100644 index 0000000..31b42c0 Binary files /dev/null and b/docs/assets/images/n8n_0.png differ diff --git a/docs/assets/images/n8n_1.png b/docs/assets/images/n8n_1.png new file mode 100644 index 0000000..3f9fecb Binary files /dev/null and b/docs/assets/images/n8n_1.png differ diff --git a/docs/assets/images/n8n_10.png b/docs/assets/images/n8n_10.png new file mode 100644 index 0000000..6fdc12c Binary files /dev/null and b/docs/assets/images/n8n_10.png differ diff --git a/docs/assets/images/n8n_2.png b/docs/assets/images/n8n_2.png new file mode 100644 index 0000000..f93599a Binary files /dev/null and b/docs/assets/images/n8n_2.png differ diff --git a/docs/assets/images/n8n_3.png b/docs/assets/images/n8n_3.png new file mode 100644 index 0000000..c1ab880 Binary files /dev/null and b/docs/assets/images/n8n_3.png differ diff --git a/docs/assets/images/n8n_4.png b/docs/assets/images/n8n_4.png new file mode 100644 index 0000000..76657fa Binary files /dev/null and b/docs/assets/images/n8n_4.png differ diff --git a/docs/assets/images/n8n_5.png b/docs/assets/images/n8n_5.png new file mode 100644 index 0000000..f6aa18a Binary files /dev/null and b/docs/assets/images/n8n_5.png differ diff --git a/docs/assets/images/n8n_6.png b/docs/assets/images/n8n_6.png new file mode 100644 index 0000000..88c9ea3 Binary files /dev/null and b/docs/assets/images/n8n_6.png differ diff --git a/docs/assets/images/n8n_7.png b/docs/assets/images/n8n_7.png new file mode 100644 index 0000000..7a1e0f7 Binary files /dev/null and b/docs/assets/images/n8n_7.png differ diff --git a/docs/assets/images/n8n_8.png b/docs/assets/images/n8n_8.png new file mode 100644 index 0000000..9daff94 Binary files /dev/null and b/docs/assets/images/n8n_8.png differ diff --git a/docs/assets/images/n8n_9.png b/docs/assets/images/n8n_9.png new file mode 100644 index 0000000..77c6272 Binary files /dev/null and b/docs/assets/images/n8n_9.png differ diff --git a/docs/chemical_knowledge_introduction/introduction.pdf b/docs/chemical_knowledge_introduction/introduction.pdf new file mode 100644 index 0000000..9ececd8 Binary files /dev/null and b/docs/chemical_knowledge_introduction/introduction.pdf differ diff --git a/docs/chemical_knowledge_introduction/introduction.xmind b/docs/chemical_knowledge_introduction/introduction.xmind new file mode 100644 index 0000000..a73d2c1 Binary files /dev/null and b/docs/chemical_knowledge_introduction/introduction.xmind differ diff --git a/docs/en/demo/index.md b/docs/en/demo/index.md new file mode 100644 index 0000000..0b39f98 --- /dev/null +++ b/docs/en/demo/index.md @@ -0,0 +1,2 @@ + + diff --git a/docs/en/faq/index.md b/docs/en/faq/index.md new file mode 100644 index 0000000..0877424 --- /dev/null +++ b/docs/en/faq/index.md @@ -0,0 +1,30 @@ +# Frequently Asked Questions + +If your question is not listed, try using [DeepWiki](https://deepwiki.com/opendatalab/MinerU)'s AI assistant for common issues. + +For unresolved problems, join our [Discord](https://discord.gg/Tdedn9GTXq) or [WeChat](https://mineru.net/community-portal/?aliasId=3c430f94) community for support. + +??? question "Encountered the error `ImportError: libGL.so.1: cannot open shared object file: No such file or directory` in Ubuntu 22.04 on WSL2" + + The `libgl` library is missing in Ubuntu 22.04 on WSL2. You can install the `libgl` library with the following command to resolve the issue: + + ```bash + sudo apt-get install libgl1-mesa-glx + ``` + + Reference: [#388](https://github.com/opendatalab/MinerU/issues/388) + + +??? question "Missing text information in parsing results when installing and using on Linux systems." + + MinerU uses `pypdfium2` instead of `pymupdf` as the PDF page rendering engine in versions >=2.0 to resolve AGPLv3 license issues. On some Linux distributions, due to missing CJK fonts, some text may be lost during the process of rendering PDFs to images. + To solve this problem, you can install the noto font package with the following commands, which are effective on Ubuntu/Debian systems: + ```bash + sudo apt update + sudo apt install fonts-noto-core + sudo apt install fonts-noto-cjk + fc-cache -fv + ``` + You can also directly use our [Docker deployment](../quick_start/docker_deployment.md) method to build the image, which includes the above font packages by default. + + Reference: [#2915](https://github.com/opendatalab/MinerU/issues/2915) diff --git a/docs/en/index.md b/docs/en/index.md new file mode 100644 index 0000000..41d0a56 --- /dev/null +++ b/docs/en/index.md @@ -0,0 +1,70 @@ +
+ +

+ +

+
+ + + +[![stars](https://img.shields.io/github/stars/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU) +[![forks](https://img.shields.io/github/forks/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU) +[![open issues](https://img.shields.io/github/issues-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues) +[![issue resolution](https://img.shields.io/github/issues-closed-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues) +[![PyPI version](https://img.shields.io/pypi/v/mineru)](https://pypi.org/project/mineru/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mineru)](https://pypi.org/project/mineru/) +[![Downloads](https://static.pepy.tech/badge/mineru)](https://pepy.tech/project/mineru) +[![Downloads](https://static.pepy.tech/badge/mineru/month)](https://pepy.tech/project/mineru) +[![OpenDataLab](https://img.shields.io/badge/webapp_on_mineru.net-blue?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTM0IiBoZWlnaHQ9IjEzNCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJtMTIyLDljMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0idXJsKCNhKSIvPjxwYXRoIGQ9Im0xMjIsOWMwLDUtNCw5LTksOXMtOS00LTktOSw0LTksOS05LDksNCw5LDl6IiBmaWxsPSIjMDEwMTAxIi8+PHBhdGggZD0ibTkxLDE4YzAsNS00LDktOSw5cy05LTQtOS05LDQtOSw5LTksOSw0LDksOXoiIGZpbGw9InVybCgjYikiLz48cGF0aCBkPSJtOTEsMThjMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0iIzAxMDEwMSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0idXJsKCNjKSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0iIzAxMDEwMSIvPjxkZWZzPjxsaW5lYXJHcmFkaWVudCBpZD0iYSIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYiIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYyIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjwvZGVmcz48L3N2Zz4=&labelColor=white)](https://mineru.net/OpenSourceTools/Extractor?source=github) +[![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAF8AAABYCAMAAACkl9t/AAAAk1BMVEVHcEz/nQv/nQv/nQr/nQv/nQr/nQv/nQv/nQr/wRf/txT/pg7/yRr/rBD/zRz/ngv/oAz/zhz/nwv/txT/ngv/0B3+zBz/nQv/0h7/wxn/vRb/thXkuiT/rxH/pxD/ogzcqyf/nQvTlSz/czCxky7/SjifdjT/Mj3+Mj3wMj15aTnDNz+DSD9RTUBsP0FRO0Q6O0WyIxEIAAAAGHRSTlMADB8zSWF3krDDw8TJ1NbX5efv8ff9/fxKDJ9uAAAGKklEQVR42u2Z63qjOAyGC4RwCOfB2JAGqrSb2WnTw/1f3UaWcSGYNKTdf/P+mOkTrE+yJBulvfvLT2A5ruenaVHyIks33npl/6C4s/ZLAM45SOi/1FtZPyFur1OYofBX3w7d54Bxm+E8db+nDr12ttmESZ4zludJEG5S7TO72YPlKZFyE+YCYUJTBZsMiNS5Sd7NlDmKM2Eg2JQg8awbglfqgbhArjxkS7dgp2RH6hc9AMLdZYUtZN5DJr4molC8BfKrEkPKEnEVjLbgW1fLy77ZVOJagoIcLIl+IxaQZGjiX597HopF5CkaXVMDO9Pyix3AFV3kw4lQLCbHuMovz8FallbcQIJ5Ta0vks9RnolbCK84BtjKRS5uA43hYoZcOBGIG2Epbv6CvFVQ8m8loh66WNySsnN7htL58LNp+NXT8/PhXiBXPMjLSxtwp8W9f/1AngRierBkA+kk/IpUSOeKByzn8y3kAAAfh//0oXgV4roHm/kz4E2z//zRc3/lgwBzbM2mJxQEa5pqgX7d1L0htrhx7LKxOZlKbwcAWyEOWqYSI8YPtgDQVjpB5nvaHaSnBaQSD6hweDi8PosxD6/PT09YY3xQA7LTCTKfYX+QHpA0GCcqmEHvr/cyfKQTEuwgbs2kPxJEB0iNjfJcCTPyocx+A0griHSmADiC91oNGVwJ69RudYe65vJmoqfpul0lrqXadW0jFKH5BKwAeCq+Den7s+3zfRJzA61/Uj/9H/VzLKTx9jFPPdXeeP+L7WEvDLAKAIoF8bPTKT0+TM7W8ePj3Rz/Yn3kOAp2f1Kf0Weony7pn/cPydvhQYV+eFOfmOu7VB/ViPe34/EN3RFHY/yRuT8ddCtMPH/McBAT5s+vRde/gf2c/sPsjLK+m5IBQF5tO+h2tTlBGnP6693JdsvofjOPnnEHkh2TnV/X1fBl9S5zrwuwF8NFrAVJVwCAPTe8gaJlomqlp0pv4Pjn98tJ/t/fL++6unpR1YGC2n/KCoa0tTLoKiEeUPDl94nj+5/Tv3/eT5vBQ60X1S0oZr+IWRR8Ldhu7AlLjPISlJcO9vrFotky9SpzDequlwEir5beYAc0R7D9KS1DXva0jhYRDXoExPdc6yw5GShkZXe9QdO/uOvHofxjrV/TNS6iMJS+4TcSTgk9n5agJdBQbB//IfF/HpvPt3Tbi7b6I6K0R72p6ajryEJrENW2bbeVUGjfgoals4L443c7BEE4mJO2SpbRngxQrAKRudRzGQ8jVOL2qDVjjI8K1gc3TIJ5KiFZ1q+gdsARPB4NQS4AjwVSt72DSoXNyOWUrU5mQ9nRYyjp89Xo7oRI6Bga9QNT1mQ/ptaJq5T/7WcgAZywR/XlPGAUDdet3LE+qS0TI+g+aJU8MIqjo0Kx8Ly+maxLjJmjQ18rA0YCkxLQbUZP1WqdmyQGJLUm7VnQFqodmXSqmRrdVpqdzk5LvmvgtEcW8PMGdaS23EOWyDVbACZzUJPaqMbjDxpA3Qrgl0AikimGDbqmyT8P8NOYiqrldF8rX+YN7TopX4UoHuSCYY7cgX4gHwclQKl1zhx0THf+tCAUValzjI7Wg9EhptrkIcfIJjA94evOn8B2eHaVzvBrnl2ig0So6hvPaz0IGcOvTHvUIlE2+prqAxLSQxZlU2stql1NqCCLdIiIN/i1DBEHUoElM9dBravbiAnKqgpi4IBkw+utSPIoBijDXJipSVV7MpOEJUAc5Qmm3BnUN+w3hteEieYKfRZSIUcXKMVf0u5wD4EwsUNVvZOtUT7A2GkffHjByWpHqvRBYrTV72a6j8zZ6W0DTE86Hn04bmyWX3Ri9WH7ZU6Q7h+ZHo0nHUAcsQvVhXRDZHChwiyi/hnPuOsSEF6Exk3o6Y9DT1eZ+6cASXk2Y9k+6EOQMDGm6WBK10wOQJCBwren86cPPWUcRAnTVjGcU1LBgs9FURiX/e6479yZcLwCBmTxiawEwrOcleuu12t3tbLv/N4RLYIBhYexm7Fcn4OJcn0+zc+s8/VfPeddZHAGN6TT8eGczHdR/Gts1/MzDkThr23zqrVfAMFT33Nx1RJsx1k5zuWILLnG/vsH+Fv5D4NTVcp1Gzo8AAAAAElFTkSuQmCC&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU) +[![ModelScope](https://img.shields.io/badge/Demo_on_ModelScope-purple?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjIzIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KCiA8Zz4KICA8dGl0bGU+TGF5ZXIgMTwvdGl0bGU+CiAgPHBhdGggaWQ9InN2Z18xNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTAsODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTUiIGZpbGw9IiM2MjRhZmYiIGQ9Im05OS4xNCwxMTUuNDlsMjUuNjUsMGwwLDI1LjY1bC0yNS42NSwwbDAsLTI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTYiIGZpbGw9IiM2MjRhZmYiIGQ9Im0xNzYuMDksMTQxLjE0bC0yNS42NDk5OSwwbDAsMjIuMTlsNDcuODQsMGwwLC00Ny44NGwtMjIuMTksMGwwLDI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTciIGZpbGw9IiMzNmNmZDEiIGQ9Im0xMjQuNzksODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTgiIGZpbGw9IiMzNmNmZDEiIGQ9Im0wLDY0LjE5bDI1LjY1LDBsMCwyNS42NWwtMjUuNjUsMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzE5IiBmaWxsPSIjNjI0YWZmIiBkPSJtMTk4LjI4LDg5Ljg0bDI1LjY0OTk5LDBsMCwyNS42NDk5OWwtMjUuNjQ5OTksMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIwIiBmaWxsPSIjMzZjZmQxIiBkPSJtMTk4LjI4LDY0LjE5bDI1LjY0OTk5LDBsMCwyNS42NWwtMjUuNjQ5OTksMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIxIiBmaWxsPSIjNjI0YWZmIiBkPSJtMTUwLjQ0LDQybDAsMjIuMTlsMjUuNjQ5OTksMGwwLDI1LjY1bDIyLjE5LDBsMCwtNDcuODRsLTQ3Ljg0LDB6Ii8+CiAgPHBhdGggaWQ9InN2Z18yMiIgZmlsbD0iIzM2Y2ZkMSIgZD0ibTczLjQ5LDg5Ljg0bDI1LjY1LDBsMCwyNS42NDk5OWwtMjUuNjUsMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIzIiBmaWxsPSIjNjI0YWZmIiBkPSJtNDcuODQsNjQuMTlsMjUuNjUsMGwwLC0yMi4xOWwtNDcuODQsMGwwLDQ3Ljg0bDIyLjE5LDBsMCwtMjUuNjV6Ii8+CiAgPHBhdGggaWQ9InN2Z18yNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTQ3Ljg0LDExNS40OWwtMjIuMTksMGwwLDQ3Ljg0bDQ3Ljg0LDBsMCwtMjIuMTlsLTI1LjY1LDBsMCwtMjUuNjV6Ii8+CiA8L2c+Cjwvc3ZnPg==&labelColor=white)](https://www.modelscope.cn/studios/OpenDataLab/MinerU) +[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/myhloli/a3cb16570ab3cfeadf9d8f0ac91b4fca/mineru_demo.ipynb) +[![arXiv](https://img.shields.io/badge/MinerU-Technical%20Report-b31b1b.svg?logo=arXiv)](https://arxiv.org/abs/2409.18839) +[![arXiv](https://img.shields.io/badge/MinerU2.5-Technical%20Report-b31b1b.svg?logo=arXiv)](https://arxiv.org/abs/2509.22186) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/opendatalab/MinerU) + +
+opendatalab%2FMinerU | Trendshift + + + +

+🚀MinerU Official Website→✅ Zero-Install Online Version ✅ Full-Featured Client ✅ Developer API Online Access, skip deployment hassles, get all product formats with one click, go fast! +

+ + + +

+ 👋 join us on Discord and WeChat +

+
+ +## Project Introduction + +MinerU is a tool that converts PDFs into machine-readable formats (e.g., markdown, JSON), allowing for easy extraction into any format. +MinerU was born during the pre-training process of [InternLM](https://github.com/InternLM/InternLM). We focus on solving symbol conversion issues in scientific literature and hope to contribute to technological development in the era of large models. +Compared to well-known commercial products domestically and internationally, MinerU is still young. If you encounter any issues or if the results are not as expected, please submit an issue on [GitHub Issues](https://github.com/opendatalab/MinerU/issues) and **attach the relevant PDF**. + +![type:video](https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c) + + +## Key Features + +- Remove headers, footers, footnotes, page numbers and other elements to ensure semantic coherence +- Output text in human reading order, suitable for single-column, multi-column and complex layouts +- Retain the original document structure, including titles, paragraphs, lists, etc. +- Extract images, image descriptions, tables, table titles and footnotes +- Automatically identify and convert formulas in documents to LaTeX format +- Automatically identify and convert tables in documents to HTML format +- Automatically detect scanned PDFs and garbled PDFs, and enable OCR functionality +- OCR supports detection and recognition of 109 languages +- Support multiple output formats, such as multimodal and NLP Markdown, reading-order-sorted JSON, and information-rich intermediate formats +- Support multiple visualization results, including layout visualization, span visualization, etc., for efficient confirmation of output effects and quality inspection +- Support pure CPU environment operation, and support GPU(CUDA)/NPU(CANN)/MPS acceleration +- Compatible with Windows, Linux and Mac platforms + + +## User Guide + +- [Quick Start Guide](./quick_start/index.md) +- [Detailed Usage Instructions](./usage/index.md) diff --git a/docs/en/quick_start/docker_deployment.md b/docs/en/quick_start/docker_deployment.md new file mode 100644 index 0000000..2f5217c --- /dev/null +++ b/docs/en/quick_start/docker_deployment.md @@ -0,0 +1,86 @@ +# Deploying MinerU with Docker + +MinerU provides a convenient Docker deployment method, which helps quickly set up the environment and solve some tricky environment compatibility issues. + +## Build Docker Image using Dockerfile + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/global/Dockerfile +docker build -t mineru:latest -f Dockerfile . +``` + +> [!TIP] +> The [Dockerfile](https://github.com/opendatalab/MinerU/blob/master/docker/global/Dockerfile) uses `vllm/vllm-openai:v0.10.1.1` as the base image by default. This version of vLLM v1 engine has limited support for GPU models. +> This version supports a limited range of GPU models and may only function on Ampere, Ada Lovelace, and Hopper architectures. If you cannot use vLLM for accelerated inference on Volta, Turing, or Blackwell GPUs, you can resolve this issue by changing the base image to `vllm/vllm-openai:v0.11.0`. + +## Docker Description + +MinerU's Docker uses `vllm/vllm-openai` as the base image, so it includes the `vllm` inference acceleration framework and necessary dependencies by default. Therefore, on compatible devices, you can directly use `vllm` to accelerate VLM model inference. + +> [!NOTE] +> Requirements for using `vllm` to accelerate VLM model inference: +> +> - Device must have Volta architecture or later graphics cards with 8GB+ available VRAM. +> - The host machine's graphics driver should support CUDA 12.8 or higher; You can check the driver version using the `nvidia-smi` command. +> - Docker container must have access to the host machine's graphics devices. + +## Start Docker Container + +```bash +docker run --gpus all \ + --shm-size 32g \ + -p 30000:30000 -p 7860:7860 -p 8000:8000 \ + --ipc=host \ + -it mineru:latest \ + /bin/bash +``` + +After executing this command, you will enter the Docker container's interactive terminal with some ports mapped for potential services. You can directly run MinerU-related commands within the container to use MinerU's features. +You can also directly start MinerU services by replacing `/bin/bash` with service startup commands. For detailed instructions, please refer to the [Start the service via command](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-http-clientserver). + +## Start Services Directly with Docker Compose + +We provide a [compose.yaml](https://github.com/opendatalab/MinerU/blob/master/docker/compose.yaml) file that you can use to quickly start MinerU services. + +```bash +# Download compose.yaml file +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml +``` + +>[!NOTE] +> +>- The `compose.yaml` file contains configurations for multiple services of MinerU, you can choose to start specific services as needed. +>- Different services might have additional parameter configurations, which you can view and edit in the `compose.yaml` file. +>- Due to the pre-allocation of GPU memory by the `vllm` inference acceleration framework, you may not be able to run multiple `vllm` services simultaneously on the same machine. Therefore, ensure that other services that might use GPU memory have been stopped before starting the `vlm-openai-server` service or using the `vlm-vllm-engine` backend. + +--- + +### Start OpenAI-compatible server service +connect to `openai-server` via `vlm-http-client` backend + ```bash + docker compose -f compose.yaml --profile openai-server up -d + ``` + >[!TIP] + >In another terminal, connect to openai server via http client (only requires CPU and network, no vllm environment needed) + > ```bash + > mineru -p -o -b vlm-http-client -u http://:30000 + > ``` + +--- + +### Start Web API service + ```bash + docker compose -f compose.yaml --profile api up -d + ``` + >[!TIP] + >Access `http://:8000/docs` in your browser to view the API documentation. + +--- + +### Start Gradio WebUI service + ```bash + docker compose -f compose.yaml --profile gradio up -d + ``` + >[!TIP] + > + >- Access `http://:7860` in your browser to use the Gradio WebUI. diff --git a/docs/en/quick_start/extension_modules.md b/docs/en/quick_start/extension_modules.md new file mode 100644 index 0000000..f6bf34a --- /dev/null +++ b/docs/en/quick_start/extension_modules.md @@ -0,0 +1,56 @@ +# MinerU Extension Modules Installation Guide +MinerU supports installing extension modules on demand based on different needs to enhance functionality or support specific model backends. + +## Common Scenarios + +### Core Functionality Installation +The `core` module is the core dependency of MinerU, containing all functional modules except `vllm`/`lmdeploy`. Installing this module ensures the basic functionality of MinerU works properly. +```bash +uv pip install "mineru[core]" +``` + +--- + +### Using `vllm` to Accelerate VLM Model Inference +> [!NOTE] +> `vllm` and `lmdeploy` have nearly identical VLM inference acceleration effects and usage methods. You can choose one of them to install and use based on your actual needs, but it is not recommended to install both modules simultaneously to avoid potential dependency conflicts. + +The `vllm` module provides acceleration support for VLM model inference, suitable for graphics cards with Volta architecture and later (8GB+ VRAM). Installing this module can significantly improve model inference speed. + +```bash +uv pip install "mineru[core,vllm]" +``` +> [!TIP] +> If exceptions occur during installation of the extra package including vllm, please refer to the [vllm official documentation](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) to try to resolve the issue, or directly use the [Docker](./docker_deployment.md) deployment method. + +--- + +### Using `lmdeploy` to Accelerate VLM Model Inference +> [!NOTE] +> `vllm` and `lmdeploy` have nearly identical VLM inference acceleration effects and usage methods. You can choose one of them to install and use based on your actual needs, but it is not recommended to install both modules simultaneously to avoid potential dependency conflicts. + +The `lmdeploy` module provides acceleration support for VLM model inference, suitable for graphics cards with Volta architecture and later (8GB+ VRAM). Installing this module can significantly improve model inference speed. + +```bash +uv pip install "mineru[core,lmdeploy]" +``` +> [!TIP] +> If exceptions occur during installation of the extra package including lmdeploy, please refer to the [lmdeploy official documentation](https://lmdeploy.readthedocs.io/en/latest/get_started/installation.html) to try to resolve the issue. + +--- + +### Installing Lightweight Client to Connect to OpenAI-compatible servers (for vlm-http-client mode) +If you need to install a lightweight client on edge devices to connect to an OpenAI-compatible server for using VLM mode, you can install the basic mineru package, which is very lightweight and suitable for devices with only CPU and network connectivity. +```bash +uv pip install mineru +mineru -p -o -b vlm-http-client -u http://127.0.0.1:30000 +``` + +--- + +### Installing Lightweight Client to Connect to OpenAI-compatible servers (for hybrid-http-client mode) +If you need to install a lightweight client on edge devices to connect to an OpenAI-compatible server for using hybrid mode, you can install the mineru pipeline extension package, which is relatively lightweight and can be used on devices with only CPU and network connectivity, while running faster on devices that support GPU acceleration. +```bash +uv pip install "mineru[pipeline]" +mineru -p -o -b hybrid-http-client -u http://127.0.0.1:30000 +``` \ No newline at end of file diff --git a/docs/en/quick_start/index.md b/docs/en/quick_start/index.md new file mode 100644 index 0000000..05c245d --- /dev/null +++ b/docs/en/quick_start/index.md @@ -0,0 +1,141 @@ +# Quick Start + +If you encounter any installation issues, please check the [FAQ](../faq/index.md) first. + +## Online Experience + +### Official online web application +The official online version has the same functionality as the client, with a beautiful interface and rich features, requires login to use + +- [![OpenDataLab](https://img.shields.io/badge/webapp_on_mineru.net-blue?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTM0IiBoZWlnaHQ9IjEzNCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJtMTIyLDljMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0idXJsKCNhKSIvPjxwYXRoIGQ9Im0xMjIsOWMwLDUtNCw5LTksOXMtOS00LTktOSw0LTksOS05LDksNCw5LDl6IiBmaWxsPSIjMDEwMTAxIi8+PHBhdGggZD0ibTkxLDE4YzAsNS00LDktOSw5cy05LTQtOS05LDQtOSw5LTksOSw0LDksOXoiIGZpbGw9InVybCgjYikiLz48cGF0aCBkPSJtOTEsMThjMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0iIzAxMDEwMSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0idXJsKCNjKSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0iIzAxMDEwMSIvPjxkZWZzPjxsaW5lYXJHcmFkaWVudCBpZD0iYSIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYiIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYyIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjwvZGVmcz48L3N2Zz4=&labelColor=white)](https://mineru.net/OpenSourceTools/Extractor?source=github) + +### Gradio-based online demo +A WebUI developed based on Gradio, with a simple interface and only core parsing functionality, no login required + +- [![ModelScope](https://img.shields.io/badge/Demo_on_ModelScope-purple?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjIzIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KCiA8Zz4KICA8dGl0bGU+TGF5ZXIgMTwvdGl0bGU+CiAgPHBhdGggaWQ9InN2Z18xNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTAsODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTUiIGZpbGw9IiM2MjRhZmYiIGQ9Im05OS4xNCwxMTUuNDlsMjUuNjUsMGwwLDI1LjY1bC0yNS42NSwwbDAsLTI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTYiIGZpbGw9IiM2MjRhZmYiIGQ9Im0xNzYuMDksMTQxLjE0bC0yNS42NDk5OSwwbDAsMjIuMTlsNDcuODQsMGwwLC00Ny44NGwtMjIuMTksMGwwLDI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTciIGZpbGw9IiMzNmNmZDEiIGQ9Im0xMjQuNzksODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTgiIGZpbGw9IiMzNmNmZDEiIGQ9Im0wLDY0LjE5bDI1LjY1LDBsMCwyNS42NWwtMjUuNjUsMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzE5IiBmaWxsPSIjNjI0YWZmIiBkPSJtMTk4LjI4LDg5Ljg0bDI1LjY0OTk5LDBsMCwyNS42NDk5OWwtMjUuNjQ5OTksMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIwIiBmaWxsPSIjMzZjZmQxIiBkPSJtMTk4LjI4LDY0LjE5bDI1LjY0OTk5LDBsMCwyNS42NWwtMjUuNjQ5OTksMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIxIiBmaWxsPSIjNjI0YWZmIiBkPSJtMTUwLjQ0LDQybDAsMjIuMTlsMjUuNjQ5OTksMGwwLDI1LjY1bDIyLjE5LDBsMCwtNDcuODRsLTQ3Ljg0LDB6Ii8+CiAgPHBhdGggaWQ9InN2Z18yMiIgZmlsbD0iIzM2Y2ZkMSIgZD0ibTczLjQ5LDg5Ljg0bDI1LjY1LDBsMCwyNS42NDk5OWwtMjUuNjUsMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIzIiBmaWxsPSIjNjI0YWZmIiBkPSJtNDcuODQsNjQuMTlsMjUuNjUsMGwwLC0yMi4xOWwtNDcuODQsMGwwLDQ3Ljg0bDIyLjE5LDBsMCwtMjUuNjV6Ii8+CiAgPHBhdGggaWQ9InN2Z18yNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTQ3Ljg0LDExNS40OWwtMjIuMTksMGwwLDQ3Ljg0bDQ3Ljg0LDBsMCwtMjIuMTlsLTI1LjY1LDBsMCwtMjUuNjV6Ii8+CiA8L2c+Cjwvc3ZnPg==&labelColor=white)](https://www.modelscope.cn/studios/OpenDataLab/MinerU) +- [![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAF8AAABYCAMAAACkl9t/AAAAk1BMVEVHcEz/nQv/nQv/nQr/nQv/nQr/nQv/nQv/nQr/wRf/txT/pg7/yRr/rBD/zRz/ngv/oAz/zhz/nwv/txT/ngv/0B3+zBz/nQv/0h7/wxn/vRb/thXkuiT/rxH/pxD/ogzcqyf/nQvTlSz/czCxky7/SjifdjT/Mj3+Mj3wMj15aTnDNz+DSD9RTUBsP0FRO0Q6O0WyIxEIAAAAGHRSTlMADB8zSWF3krDDw8TJ1NbX5efv8ff9/fxKDJ9uAAAGKklEQVR42u2Z63qjOAyGC4RwCOfB2JAGqrSb2WnTw/1f3UaWcSGYNKTdf/P+mOkTrE+yJBulvfvLT2A5ruenaVHyIks33npl/6C4s/ZLAM45SOi/1FtZPyFur1OYofBX3w7d54Bxm+E8db+nDr12ttmESZ4zludJEG5S7TO72YPlKZFyE+YCYUJTBZsMiNS5Sd7NlDmKM2Eg2JQg8awbglfqgbhArjxkS7dgp2RH6hc9AMLdZYUtZN5DJr4molC8BfKrEkPKEnEVjLbgW1fLy77ZVOJagoIcLIl+IxaQZGjiX597HopF5CkaXVMDO9Pyix3AFV3kw4lQLCbHuMovz8FallbcQIJ5Ta0vks9RnolbCK84BtjKRS5uA43hYoZcOBGIG2Epbv6CvFVQ8m8loh66WNySsnN7htL58LNp+NXT8/PhXiBXPMjLSxtwp8W9f/1AngRierBkA+kk/IpUSOeKByzn8y3kAAAfh//0oXgV4roHm/kz4E2z//zRc3/lgwBzbM2mJxQEa5pqgX7d1L0htrhx7LKxOZlKbwcAWyEOWqYSI8YPtgDQVjpB5nvaHaSnBaQSD6hweDi8PosxD6/PT09YY3xQA7LTCTKfYX+QHpA0GCcqmEHvr/cyfKQTEuwgbs2kPxJEB0iNjfJcCTPyocx+A0griHSmADiC91oNGVwJ69RudYe65vJmoqfpul0lrqXadW0jFKH5BKwAeCq+Den7s+3zfRJzA61/Uj/9H/VzLKTx9jFPPdXeeP+L7WEvDLAKAIoF8bPTKT0+TM7W8ePj3Rz/Yn3kOAp2f1Kf0Weony7pn/cPydvhQYV+eFOfmOu7VB/ViPe34/EN3RFHY/yRuT8ddCtMPH/McBAT5s+vRde/gf2c/sPsjLK+m5IBQF5tO+h2tTlBGnP6693JdsvofjOPnnEHkh2TnV/X1fBl9S5zrwuwF8NFrAVJVwCAPTe8gaJlomqlp0pv4Pjn98tJ/t/fL++6unpR1YGC2n/KCoa0tTLoKiEeUPDl94nj+5/Tv3/eT5vBQ60X1S0oZr+IWRR8Ldhu7AlLjPISlJcO9vrFotky9SpzDequlwEir5beYAc0R7D9KS1DXva0jhYRDXoExPdc6yw5GShkZXe9QdO/uOvHofxjrV/TNS6iMJS+4TcSTgk9n5agJdBQbB//IfF/HpvPt3Tbi7b6I6K0R72p6ajryEJrENW2bbeVUGjfgoals4L443c7BEE4mJO2SpbRngxQrAKRudRzGQ8jVOL2qDVjjI8K1gc3TIJ5KiFZ1q+gdsARPB4NQS4AjwVSt72DSoXNyOWUrU5mQ9nRYyjp89Xo7oRI6Bga9QNT1mQ/ptaJq5T/7WcgAZywR/XlPGAUDdet3LE+qS0TI+g+aJU8MIqjo0Kx8Ly+maxLjJmjQ18rA0YCkxLQbUZP1WqdmyQGJLUm7VnQFqodmXSqmRrdVpqdzk5LvmvgtEcW8PMGdaS23EOWyDVbACZzUJPaqMbjDxpA3Qrgl0AikimGDbqmyT8P8NOYiqrldF8rX+YN7TopX4UoHuSCYY7cgX4gHwclQKl1zhx0THf+tCAUValzjI7Wg9EhptrkIcfIJjA94evOn8B2eHaVzvBrnl2ig0So6hvPaz0IGcOvTHvUIlE2+prqAxLSQxZlU2stql1NqCCLdIiIN/i1DBEHUoElM9dBravbiAnKqgpi4IBkw+utSPIoBijDXJipSVV7MpOEJUAc5Qmm3BnUN+w3hteEieYKfRZSIUcXKMVf0u5wD4EwsUNVvZOtUT7A2GkffHjByWpHqvRBYrTV72a6j8zZ6W0DTE86Hn04bmyWX3Ri9WH7ZU6Q7h+ZHo0nHUAcsQvVhXRDZHChwiyi/hnPuOsSEF6Exk3o6Y9DT1eZ+6cASXk2Y9k+6EOQMDGm6WBK10wOQJCBwren86cPPWUcRAnTVjGcU1LBgs9FURiX/e6479yZcLwCBmTxiawEwrOcleuu12t3tbLv/N4RLYIBhYexm7Fcn4OJcn0+zc+s8/VfPeddZHAGN6TT8eGczHdR/Gts1/MzDkThr23zqrVfAMFT33Nx1RJsx1k5zuWILLnG/vsH+Fv5D4NTVcp1Gzo8AAAAAElFTkSuQmCC&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU) + +## Local Deployment + +> [!WARNING] +> **Prerequisites - Hardware and Software Environment Support** +> +> To ensure the stability and reliability of the project, we have optimized and tested only specific hardware and software environments during development. This ensures that users can achieve optimal performance and encounter the fewest compatibility issues when deploying and running the project on recommended system configurations. +> +> By concentrating our resources and efforts on mainstream environments, our team can more efficiently resolve potential bugs and timely develop new features. +> +> In non-mainstream environments, due to the diversity of hardware and software configurations, as well as compatibility issues with third-party dependencies, we cannot guarantee 100% usability of the project. Therefore, for users who wish to use this project in non-recommended environments, we suggest carefully reading the documentation and FAQ first, as most issues have corresponding solutions in the FAQ. Additionally, we encourage community feedback on issues so that we can gradually expand our support range. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Parsing Backendpipeline*-auto-engine*-http-client
hybridvlmhybridvlm
Backend FeaturesGood CompatibilityHigh Hardware RequirementsFor OpenAI Compatible Servers2
Accuracy182+90+
Operating SystemLinux3 / Windows4 / macOS5
Pure CPU Support
GPU AccelerationVolta and later architecture GPUs or Apple SiliconNot Required
Min VRAM6GB10GB8GB3GB
RAMMin 16GB+, Recommended 32GB+8GB
Disk Space20GB+, SSD Recommended2GB
Python Version3.10-3.13
+ +1 Accuracy metrics are the End-to-End Evaluation Overall scores from OmniDocBench (v1.5), based on the latest version of `MinerU`. +2 Servers compatible with OpenAI API, such as local model servers or remote model services deployed via inference frameworks like `vLLM`/`SGLang`/`LMDeploy`. +3 Linux only supports distributions from 2019 and later. +4 Since the key dependency `ray` does not support Python 3.13 on Windows, only versions 3.10~3.12 are supported. +5 macOS requires version 14.0 or later. + + +### Install MinerU + +#### Install MinerU using pip or uv +```bash +pip install --upgrade pip +pip install uv +uv pip install -U "mineru[all]" +``` + +#### Install MinerU from source code +```bash +git clone https://github.com/opendatalab/MinerU.git +cd MinerU +uv pip install -e .[all] +``` + +> [!TIP] +> `mineru[all]` includes all core features, compatible with Windows / Linux / macOS systems, suitable for most users. +> If you need to specify the inference framework for the VLM model, or only intend to install a lightweight client on an edge device, please refer to the documentation [Extension Modules Installation Guide](https://opendatalab.github.io/MinerU/quick_start/extension_modules/). + +--- + +#### Deploy MinerU using Docker +MinerU provides a convenient Docker deployment method, which helps quickly set up the environment and solve some tricky environment compatibility issues. +You can get the [Docker Deployment Instructions](./docker_deployment.md) in the documentation. + +--- + +### Using MinerU + +If your device meets the GPU acceleration requirements in the table above, you can use a simple command line for document parsing: +```bash +mineru -p -o +``` +If your device does not meet the GPU acceleration requirements, you can specify the backend as `pipeline` to run in a pure CPU environment: +```bash +mineru -p -o -b pipeline +``` + +You can use MinerU for PDF parsing through various methods such as command line, API, and WebUI. For detailed instructions, please refer to the [Usage Guide](../usage/index.md). \ No newline at end of file diff --git a/docs/en/reference/changelog.md b/docs/en/reference/changelog.md new file mode 100644 index 0000000..9a54d25 --- /dev/null +++ b/docs/en/reference/changelog.md @@ -0,0 +1,461 @@ +# Changelog + +This document records the update history of MinerU project for version 2.6.7 and earlier. For the latest version updates, please check the project [README](https://github.com/opendatalab/MinerU/blob/master/README.md). + +--- + +## 2.6 Series Versions + +### 2.6.7 (2025/12/12) + +- Bug fix: #4168 + +### 2.6.6 (2025/12/02) + +**`mineru-api` tool optimizations** + +- Added descriptive text to `mineru-api` interface parameters to improve API documentation readability. +- You can use the environment variable `MINERU_API_ENABLE_FASTAPI_DOCS` to control whether the auto-generated interface documentation page is enabled (enabled by default). +- Added concurrency configuration options for the `vlm-vllm-async-engine`, `vlm-lmdeploy-engine`, and `vlm-http-client` backends. Users can use the environment variable `MINERU_API_MAX_CONCURRENT_REQUESTS` to set the maximum number of concurrent API requests (unlimited by default). + +### 2.6.5 (2025/11/26) + +- Added support for a new backend vlm-lmdeploy-engine. Its usage is similar to vlm-vllm-(async)engine, but it uses lmdeploy as the inference engine and additionally supports native inference acceleration on Windows platforms compared to vllm. + +### 2.6.4 (2025/11/04) + +- Added timeout configuration for PDF image rendering, default is 300 seconds, can be configured via environment variable `MINERU_PDF_RENDER_TIMEOUT` to prevent long blocking of the rendering process caused by some abnormal PDF files. +- Added CPU thread count configuration options for ONNX models, default is the system CPU core count, can be configured via environment variables `MINERU_INTRA_OP_NUM_THREADS` and `MINERU_INTER_OP_NUM_THREADS` to reduce CPU resource contention conflicts in high concurrency scenarios. + +### 2.6.3 (2025/10/31) + +- Added support for a new backend `vlm-mlx-engine`, enabling MLX-accelerated inference for the MinerU2.5 model on Apple Silicon devices. Compared to the `vlm-transformers` backend, `vlm-mlx-engine` delivers a 100%–200% speed improvement. +- Bug fixes: #3849, #3859 + +### 2.6.2 (2025/10/24) + +**`pipeline` backend optimizations** + +- Added experimental support for Chinese formulas, which can be enabled by setting the environment variable `export MINERU_FORMULA_CH_SUPPORT=1`. This feature may cause a slight decrease in MFR speed and failures in recognizing some long formulas. It is recommended to enable it only when parsing Chinese formulas is needed. To disable this feature, set the environment variable to `0`. +- `OCR` speed significantly improved by 200%~300%, thanks to the optimization solution provided by [@cjsdurj](https://github.com/cjsdurj) +- `OCR` models optimized for improved accuracy and coverage of Latin script recognition, and updated Cyrillic, Arabic, Devanagari, Telugu (te), and Tamil (ta) language systems to `ppocr-v5` version, with accuracy improved by over 40% compared to previous models + +**`vlm` backend optimizations** + +- `table_caption` and `table_footnote` matching logic optimized to improve the accuracy of table caption and footnote matching and reading order rationality in scenarios with multiple consecutive tables on a page +- Optimized CPU resource usage during high concurrency when using `vllm` backend, reducing server pressure +- Adapted to `vllm` version 0.11.0 + +**General optimizations** + +- Cross-page table merging effect optimized, added support for cross-page continuation table merging, improving table merging effectiveness in multi-column merge scenarios +- Added environment variable configuration option `MINERU_TABLE_MERGE_ENABLE` for table merging feature. Table merging is enabled by default and can be disabled by setting this variable to `0` + +--- + +## 2.5 Series Versions + +### 2.5.4 (2025/09/26) + +- 🎉🎉 The MinerU2.5 [Technical Report](https://arxiv.org/abs/2509.22186) is now available! We welcome you to read it for a comprehensive overview of its model architecture, training strategy, data engineering and evaluation results. +- Fixed an issue where some `PDF` files were mistakenly identified as `AI` files, causing parsing failures + +### 2.5.3 (2025/09/20) + +- Dependency version range adjustment to enable Turing and earlier architecture GPUs to use vLLM acceleration for MinerU2.5 model inference. +- `pipeline` backend compatibility fixes for torch 2.8.0. +- Reduced default concurrency for vLLM async backend to lower server pressure and avoid connection closure issues caused by high load. +- More compatibility-related details can be found in the [announcement](https://github.com/opendatalab/MinerU/discussions/3548) + +### 2.5.2 (2025/09/19) + +We are officially releasing MinerU2.5, currently the most powerful multimodal large model for document parsing. + +With only 1.2B parameters, MinerU2.5's accuracy on the OmniDocBench benchmark comprehensively surpasses top-tier multimodal models like Gemini 2.5 Pro, GPT-4o, and Qwen2.5-VL-72B. It also significantly outperforms leading specialized models such as dots.ocr, MonkeyOCR, and PP-StructureV3. + +The model has been released on [HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B) and [ModelScope](https://modelscope.cn/models/opendatalab/MinerU2.5-2509-1.2B) platforms. Welcome to download and use! + +**Core Highlights** + +- SOTA Performance with Extreme Efficiency: As a 1.2B model, it achieves State-of-the-Art (SOTA) results that exceed models in the 10B and 100B+ classes, redefining the performance-per-parameter standard in document AI. +- Advanced Architecture for Across-the-Board Leadership: By combining a two-stage inference pipeline (decoupling layout analysis from content recognition) with a native high-resolution architecture, it achieves SOTA performance across five key areas: layout analysis, text recognition, formula recognition, table recognition, and reading order. + +**Key Capability Enhancements** + +- Layout Detection: Delivers more complete results by accurately covering non-body content like headers, footers, and page numbers. It also provides more precise element localization and natural format reconstruction for lists and references. +- Table Parsing: Drastically improves parsing for challenging cases, including rotated tables, borderless/semi-structured tables, and long/complex tables. +- Formula Recognition: Significantly boosts accuracy for complex, long-form, and hybrid Chinese-English formulas, greatly enhancing the parsing capability for mathematical documents. + +**Repository Adjustments** + +Additionally, with the release of vlm 2.5, we have made some adjustments to the repository: + +- The vlm backend has been upgraded to version 2.5, supporting the MinerU2.5 model and no longer compatible with the MinerU2.0-2505-0.9B model. The last version supporting the 2.0 model is mineru-2.2.2. +- VLM inference-related code has been moved to [mineru_vl_utils](https://github.com/opendatalab/mineru-vl-utils), reducing coupling with the main mineru repository and facilitating independent iteration in the future. +- The vlm accelerated inference framework has been switched from `sglang` to `vllm`, achieving full compatibility with the vllm ecosystem, allowing users to use the MinerU2.5 model and accelerated inference on any platform that supports the vllm framework. +- Due to major upgrades in the vlm model supporting more layout types, we have made some adjustments to the structure of the parsing intermediate file `middle.json` and result file `content_list.json`. Please refer to the [documentation](https://opendatalab.github.io/MinerU/reference/output_files/) for details. + +**Other Repository Optimizations** + +- Removed file extension whitelist validation for input files. When input files are PDF documents or images, there are no longer requirements for file extensions, improving usability. + +--- + +## 2.2 - 2.4 Series Versions + +### 2.2.2 (2025/09/10) + +- Fixed the issue where the new table recognition model would affect the overall parsing task when some table parsing failed + +### 2.2.1 (2025/09/08) + +- Fixed the issue where some newly added models were not downloaded when using the model download command. + +### 2.2.0 (2025/09/05) + +**Major Updates** + +- In this version, we focused on improving table parsing accuracy by introducing a new [wired table recognition model](https://github.com/RapidAI/TableStructureRec) and a brand-new hybrid table structure parsing algorithm, significantly enhancing the table recognition capabilities of the `pipeline` backend. +- We also added support for cross-page table merging, which is supported by both `pipeline` and `vlm` backends, further improving the completeness and accuracy of table parsing. + +**Other Updates** + +- The `pipeline` backend now supports 270-degree rotated table parsing, bringing support for table parsing in 0/90/270-degree orientations +- `pipeline` added OCR capability support for Thai and Greek, and updated the English OCR model to the latest version. English recognition accuracy improved by 11%, Thai recognition model accuracy is 82.68%, and Greek recognition model accuracy is 89.28% (by PPOCRv5) +- Added `bbox` field (mapped to 0-1000 range) in the output `content_list.json`, making it convenient for users to directly obtain position information for each content block +- Removed the `pipeline_old_linux` installation option, no longer supporting legacy Linux systems such as `CentOS 7`, to provide better support for `uv`'s `sync`/`run` commands + +--- + +## 2.1 Series Versions + +### 2.1.10 (2025/08/01) + +- Fixed an issue in the `pipeline` backend where block overlap caused the parsing results to deviate from expectations #3232 + +### 2.1.9 (2025/07/30) + +- `transformers` 4.54.1 version adaptation + +### 2.1.8 (2025/07/28) + +- `sglang` 0.4.9.post5 version adaptation + +### 2.1.7 (2025/07/27) + +- `transformers` 4.54.0 version adaptation + +### 2.1.6 (2025/07/26) + +- Fixed table parsing issues in handwritten documents when using `vlm` backend +- Fixed visualization box position drift issue when document is rotated #3175 + +### 2.1.5 (2025/07/24) + +- `sglang` 0.4.9 version adaptation, synchronously upgrading the dockerfile base image to sglang 0.4.9.post3 + +### 2.1.4 (2025/07/23) + +**Bug Fixes** + +- Fixed the issue of excessive memory consumption during the `MFR` step in the `pipeline` backend under certain scenarios #2771 +- Fixed the inaccurate matching between `image`/`table` and `caption`/`footnote` under certain conditions #3129 + +### 2.1.1 (2025/07/16) + +**Bug fixes** + +- Fixed text block content loss issue that could occur in certain `pipeline` scenarios #3005 +- Fixed issue where `sglang-client` required unnecessary packages like `torch` #2968 +- Updated `dockerfile` to fix incomplete text content parsing due to missing fonts in Linux #2915 + +**Usability improvements** + +- Updated `compose.yaml` to facilitate direct startup of `sglang-server`, `mineru-api`, and `mineru-gradio` services +- Launched brand new [online documentation site](https://opendatalab.github.io/MinerU/), simplified readme, providing better documentation experience + +### 2.1.0 (2025/07/05) + +This is the first major update of MinerU 2, which includes a large number of new features and improvements, covering significant performance optimizations, user experience enhancements, and bug fixes. The detailed update contents are as follows: + +**Performance Optimizations** + +- Significantly improved preprocessing speed for documents with specific resolutions (around 2000 pixels on the long side). +- Greatly enhanced post-processing speed when the `pipeline` backend handles batch processing of documents with fewer pages (<10 pages). +- Layout analysis speed of the `pipeline` backend has been increased by approximately 20%. + +**Experience Enhancements** + +- Built-in ready-to-use `fastapi service` and `gradio webui`. For detailed usage instructions, please refer to [Documentation](https://opendatalab.github.io/MinerU/usage/quick_usage/#advanced-usage-via-api-webui-sglang-clientserver). +- Adapted to `sglang` version `0.4.8`, significantly reducing the GPU memory requirements for the `vlm-sglang` backend. It can now run on graphics cards with as little as `8GB GPU memory` (Turing architecture or newer). +- Added transparent parameter passing for all commands related to `sglang`, allowing the `sglang-engine` backend to receive all `sglang` parameters consistently with the `sglang-server`. +- Supports feature extensions based on configuration files, including `custom formula delimiters`, `enabling heading classification`, and `customizing local model directories`. For detailed usage instructions, please refer to [Documentation](https://opendatalab.github.io/MinerU/usage/quick_usage/#extending-mineru-functionality-with-configuration-files). + +**New Features** + +- Updated the `pipeline` backend with the PP-OCRv5 multilingual text recognition model, supporting text recognition in 37 languages such as French, Spanish, Portuguese, Russian, and Korean, with an average accuracy improvement of over 30%. [Details](https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/algorithm/PP-OCRv5/PP-OCRv5_multi_languages.html) +- Introduced limited support for vertical text layout in the `pipeline` backend. + +--- + +## 2.0 Series Versions + +### 2.0.6 (2025/06/20) + +- Fixed occasional parsing interruptions caused by invalid block content in `vlm` mode +- Fixed parsing interruptions caused by incomplete table structures in `vlm` mode + +### 2.0.5 (2025/06/17) + +- Fixed the issue where models were still required to be downloaded in the `sglang-client` mode +- Fixed the issue where the `sglang-client` mode unnecessarily depended on packages like `torch` during runtime. +- Fixed the issue where only the first instance would take effect when attempting to launch multiple `sglang-client` instances via multiple URLs within the same process + +### 2.0.3 (2025/06/15) + +- Fixed a configuration file key-value update error that occurred when downloading model type was set to `all` +- Fixed the issue where the formula and table feature toggle switches were not working in `command line mode`, causing the features to remain enabled. +- Fixed compatibility issues with sglang version 0.4.7 in the `sglang-engine` mode. +- Updated Dockerfile and installation documentation for deploying the full version of MinerU in sglang environment + +### 2.0.0 (2025/06/13) + +**New Architecture** + +MinerU 2.0 has been deeply restructured in code organization and interaction methods, significantly improving system usability, maintainability, and extensibility. + +- **Removal of Third-party Dependency Limitations**: Completely eliminated the dependency on `pymupdf`, moving the project toward a more open and compliant open-source direction. +- **Ready-to-use, Easy Configuration**: No need to manually edit JSON configuration files; most parameters can now be set directly via command line or API. +- **Automatic Model Management**: Added automatic model download and update mechanisms, allowing users to complete model deployment without manual intervention. +- **Offline Deployment Friendly**: Provides built-in model download commands, supporting deployment requirements in completely offline environments. +- **Streamlined Code Structure**: Removed thousands of lines of redundant code, simplified class inheritance logic, significantly improving code readability and development efficiency. +- **Unified Intermediate Format Output**: Adopted standardized `middle_json` format, compatible with most secondary development scenarios based on this format, ensuring seamless ecosystem business migration. + +**New Model** + +MinerU 2.0 integrates our latest small-parameter, high-performance multimodal document parsing model, achieving end-to-end high-speed, high-precision document understanding. + +- **Small Model, Big Capabilities**: With parameters under 1B, yet surpassing traditional 72B-level vision-language models (VLMs) in parsing accuracy. +- **Multiple Functions in One**: A single model covers multilingual recognition, handwriting recognition, layout analysis, table parsing, formula recognition, reading order sorting, and other core tasks. +- **Ultimate Inference Speed**: Achieves peak throughput exceeding 10,000 tokens/s through `sglang` acceleration on a single NVIDIA 4090 card, easily handling large-scale document processing requirements. +- **Online Experience**: You can experience our brand-new VLM model on [MinerU.net](https://mineru.net/OpenSourceTools/Extractor), [Hugging Face](https://huggingface.co/spaces/opendatalab/MinerU), and [ModelScope](https://www.modelscope.cn/studios/OpenDataLab/MinerU). + +**Incompatible Changes Notice** + +To improve overall architectural rationality and long-term maintainability, this version contains some incompatible changes: + +- Python package name changed from `magic-pdf` to `mineru`, and the command-line tool changed from `magic-pdf` to `mineru`. Please update your scripts and command calls accordingly. +- For modular system design and ecosystem consistency considerations, MinerU 2.0 no longer includes the LibreOffice document conversion module. If you need to process Office documents, we recommend converting them to PDF format through an independently deployed LibreOffice service before proceeding with subsequent parsing operations. + +--- + +## 1.x Series Historical Versions + +### 1.3.12 (2025/05/24) + +Added support for PPOCRv5 models, updated `ch_server` model to `PP-OCRv5_rec_server`, and `ch_lite` model to `PP-OCRv5_rec_mobile` (model update required) + +- In testing, we found that PPOCRv5(server) has some improvement for handwritten documents, but has slightly lower accuracy than v4_server_doc for other document types, so the default ch model remains unchanged as `PP-OCRv4_server_rec_doc`. +- Since PPOCRv5 has enhanced recognition capabilities for handwriting and special characters, you can manually choose the PPOCRv5 model for Japanese-Traditional Chinese mixed scenarios and handwritten documents +- You can select the appropriate model through the lang parameter `lang='ch_server'` (Python API) or `--lang ch_server` (command line): + - `ch`: `PP-OCRv4_server_rec_doc` (default) (Chinese/English/Japanese/Traditional Chinese mixed/15K dictionary) + - `ch_server`: `PP-OCRv5_rec_server` (Chinese/English/Japanese/Traditional Chinese mixed + handwriting/18K dictionary) + - `ch_lite`: `PP-OCRv5_rec_mobile` (Chinese/English/Japanese/Traditional Chinese mixed + handwriting/18K dictionary) + - `ch_server_v4`: `PP-OCRv4_rec_server` (Chinese/English mixed/6K dictionary) + - `ch_lite_v4`: `PP-OCRv4_rec_mobile` (Chinese/English mixed/6K dictionary) + +Added support for handwritten documents through optimized layout recognition of handwritten text areas + +- This feature is supported by default, no additional configuration required +- You can refer to the instructions above to manually select the PPOCRv5 model for better handwritten document parsing results + +The `huggingface` and `modelscope` demos have been updated to versions that support handwriting recognition and PPOCRv5 models, which you can experience online + +### 1.3.10 (2025/04/29) + +- Added support for custom formula delimiters, which can be configured by modifying the `latex-delimiter-config` section in the `magic-pdf.json` file in your user directory. + +### 1.3.9 (2025/04/27) + +- Optimized formula parsing functionality, improved formula rendering success rate + +### 1.3.8 (2025/04/23) + +The default `ocr` model (`ch`) has been updated to `PP-OCRv4_server_rec_doc` (model update required) + +- `PP-OCRv4_server_rec_doc` is trained on a mixture of more Chinese document data and PP-OCR training data based on `PP-OCRv4_server_rec`, adding recognition capabilities for some traditional Chinese characters, Japanese, and special characters. It can recognize over 15,000 characters and improves both document-specific and general text recognition abilities. +- [Performance comparison of PP-OCRv4_server_rec_doc/PP-OCRv4_server_rec/PP-OCRv4_mobile_rec](https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/text_recognition.html#_3) +- After verification, the `PP-OCRv4_server_rec_doc` model shows significant accuracy improvements in Chinese/English/Japanese/Traditional Chinese in both single language and mixed language scenarios, with comparable speed to `PP-OCRv4_server_rec`, making it suitable for most use cases. +- In some pure English scenarios, `PP-OCRv4_server_rec_doc` may have word adhesion issues, while `PP-OCRv4_server_rec` performs better in these cases. Therefore, we've kept the `PP-OCRv4_server_rec` model, which users can access by adding the parameter `lang='ch_server'` (Python API) or `--lang ch_server` (command line). + +### 1.3.7 (2025/04/22) + +- Fixed the issue where the lang parameter was ineffective during table parsing model initialization +- Fixed the significant speed reduction of OCR and table parsing in `cpu` mode + +### 1.3.4 (2025/04/16) + +- Slightly improved OCR-det speed by removing some unnecessary blocks +- Fixed page-internal sorting errors caused by footnotes in certain cases + +### 1.3.2 (2025/04/12) + +- Fixed dependency version incompatibility issues when installing on Windows with Python 3.13 +- Optimized memory usage during batch inference +- Improved parsing of tables rotated 90 degrees +- Enhanced parsing of oversized tables in financial report samples +- Fixed the occasional word adhesion issue in English text areas when OCR language is not specified (model update required) + +### 1.3.1 (2025/04/08) + +Fixed several compatibility issues + +- Added support for Python 3.13 +- Made final adaptations for outdated Linux systems (such as CentOS 7) with no guarantee of continued support in future versions, [installation instructions](https://github.com/opendatalab/MinerU/issues/1004) + +### 1.3.0 (2025/04/03) + +**Installation and compatibility optimizations** + +- Resolved compatibility issues caused by `detectron2` by removing `layoutlmv3` usage in layout +- Extended torch version compatibility to 2.2~2.6 (excluding 2.5) +- Added CUDA compatibility for versions 11.8/12.4/12.6/12.8 (CUDA version determined by torch), solving compatibility issues for users with 50-series and H-series GPUs +- Extended Python compatibility to versions 3.10~3.12, fixing the issue of automatic downgrade to version 0.6.1 when installing in non-3.10 environments +- Optimized offline deployment process, eliminating the need to download any model files after successful deployment + +**Performance optimizations** + +- Enhanced parsing speed for batches of small files by supporting batch processing of multiple PDF files ([script example](demo/batch_demo.py)), with formula parsing speed improved by up to 1400% and overall parsing speed improved by up to 500% compared to version 1.0.1 +- Reduced memory usage and improved parsing speed by optimizing MFR model loading and usage (requires re-running the [model download process](docs/how_to_download_models_zh_cn.md) to get incremental updates to model files) +- Optimized GPU memory usage, requiring only 6GB minimum to run this project +- Improved running speed on MPS devices + +**Parsing effect optimizations** + +- Updated MFR model to `unimernet(2503)`, fixing line break loss issues in multi-line formulas + +**Usability optimizations** + +- Completely replaced the `paddle` framework and `paddleocr` in the project by using `paddleocr2torch`, resolving conflicts between `paddle` and `torch`, as well as thread safety issues caused by the `paddle` framework +- Added real-time progress bar display during parsing, allowing precise tracking of parsing progress and making the waiting process more bearable + +### 1.2.1 (2025/03/03) + +Fixed some issues + +- Fixed the impact on punctuation marks during full-width to half-width conversion of letters and numbers +- Fixed caption matching inaccuracies in certain scenarios +- Fixed formula span loss issues in certain scenarios + +### 1.2.0 (2025/02/24) + +This version includes several fixes and improvements to enhance parsing efficiency and accuracy: + +**Performance Optimization** + +- Increased classification speed for PDF documents in auto mode. + +**Parsing Optimization** + +- Improved parsing logic for documents containing watermarks, significantly enhancing the parsing results for such documents. +- Enhanced the matching logic for multiple images/tables and captions within a single page, improving the accuracy of image-text matching in complex layouts. + +**Bug Fixes** + +- Fixed an issue where image/table spans were incorrectly filled into text blocks under certain conditions. +- Resolved an issue where title blocks were empty in some cases. + +### 1.1.0 (2025/01/22) + +In this version we have focused on improving parsing accuracy and efficiency: + +**Model capability upgrade** (requires re-executing the [model download process](https://github.com/opendatalab/MinerU/blob/master/docs/how_to_download_models_en.md) to obtain incremental updates of model files) + +- The layout recognition model has been upgraded to the latest `doclayout_yolo(2501)` model, improving layout recognition accuracy. +- The formula parsing model has been upgraded to the latest `unimernet(2501)` model, improving formula recognition accuracy. + +**Performance optimization** + +- On devices that meet certain configuration requirements (16GB+ VRAM), by optimizing resource usage and restructuring the processing pipeline, overall parsing speed has been increased by more than 50%. + +**Parsing effect optimization** + +- Added a new heading classification feature (testing version, enabled by default) to the online demo ([mineru.net](https://mineru.net/OpenSourceTools/Extractor)/[huggingface](https://huggingface.co/spaces/opendatalab/MinerU)/[modelscope](https://www.modelscope.cn/studios/OpenDataLab/MinerU)), which supports hierarchical classification of headings, thereby enhancing document structuring. + +### 1.0.1 (2025/01/10) + +This is our first official release, where we have introduced a completely new API interface and enhanced compatibility through extensive refactoring, as well as a brand new automatic language identification feature: + +**New API Interface** + +- For the data-side API, we have introduced the Dataset class, designed to provide a robust and flexible data processing framework. This framework currently supports a variety of document formats, including images (.jpg and .png), PDFs, Word documents (.doc and .docx), and PowerPoint presentations (.ppt and .pptx). It ensures effective support for data processing tasks ranging from simple to complex. +- For the user-side API, we have meticulously designed the MinerU processing workflow as a series of composable Stages. Each Stage represents a specific processing step, allowing users to define new Stages according to their needs and creatively combine these stages to customize their data processing workflows. + +**Enhanced Compatibility** + +- By optimizing the dependency environment and configuration items, we ensure stable and efficient operation on ARM architecture Linux systems. +- We have deeply integrated with Huawei Ascend NPU acceleration, providing autonomous and controllable high-performance computing capabilities. This supports the localization and development of AI application platforms in China. [Ascend NPU Acceleration](https://github.com/opendatalab/MinerU/blob/master/docs/README_Ascend_NPU_Acceleration_zh_CN.md) + +**Automatic Language Identification** + +- By introducing a new language recognition model, setting the `lang` configuration to `auto` during document parsing will automatically select the appropriate OCR language model, improving the accuracy of scanned document parsing. + +--- + +## 0.x Series Historical Versions + +### 0.10.0 (2024/11/22) + +Introducing hybrid OCR text extraction capabilities: + +- Significantly improved parsing performance in complex text distribution scenarios such as dense formulas, irregular span regions, and text represented by images. +- Combines the dual advantages of accurate content extraction and faster speed in text mode, and more precise span/line region recognition in OCR mode. + +### 0.9.3 (2024/11/15) + +Integrated [RapidTable](https://github.com/RapidAI/RapidTable) for table recognition, improving single-table parsing speed by more than 10 times, with higher accuracy and lower GPU memory usage. + +### 0.9.2 (2024/11/06) + +Integrated the [StructTable-InternVL2-1B](https://huggingface.co/U4R/StructTable-InternVL2-1B) model for table recognition functionality. + +### 0.9.0 (2024/10/31) + +This is a major new version with extensive code refactoring, addressing numerous issues, improving performance, reducing hardware requirements, and enhancing usability: + +- Refactored the sorting module code to use [layoutreader](https://github.com/ppaanngggg/layoutreader) for reading order sorting, ensuring high accuracy in various layouts. +- Refactored the paragraph concatenation module to achieve good results in cross-column, cross-page, cross-figure, and cross-table scenarios. +- Refactored the list and table of contents recognition functions, significantly improving the accuracy of list blocks and table of contents blocks, as well as the parsing of corresponding text paragraphs. +- Refactored the matching logic for figures, tables, and descriptive text, greatly enhancing the accuracy of matching captions and footnotes to figures and tables, and reducing the loss rate of descriptive text to near zero. +- Added multi-language support for OCR, supporting detection and recognition of 84 languages. For the list of supported languages, see [OCR Language Support List](https://paddlepaddle.github.io/PaddleOCR/latest/en/ppocr/blog/multi_languages.html#5). +- Added memory recycling logic and other memory optimization measures, significantly reducing memory usage. The memory requirement for enabling all acceleration features except table acceleration (layout/formula/OCR) has been reduced from 16GB to 8GB, and the memory requirement for enabling all acceleration features has been reduced from 24GB to 10GB. +- Optimized configuration file feature switches, adding an independent formula detection switch to significantly improve speed and parsing results when formula detection is not needed. +- Integrated [PDF-Extract-Kit 1.0](https://github.com/opendatalab/PDF-Extract-Kit): + - Added the self-developed `doclayout_yolo` model, which speeds up processing by more than 10 times compared to the original solution while maintaining similar parsing effects, and can be freely switched with `layoutlmv3` via the configuration file. + - Upgraded formula parsing to `unimernet 0.2.1`, improving formula parsing accuracy while significantly reducing memory usage. + - Due to the repository change for `PDF-Extract-Kit 1.0`, you need to re-download the model. Please refer to [How to Download Models](https://github.com/opendatalab/MinerU/blob/master/docs/how_to_download_models_en.md) for detailed steps. + +### 0.8.1 (2024/09/27) + +Fixed some bugs, and providing a [localized deployment version](https://github.com/opendatalab/MinerU/blob/master/projects/web_demo/README.md) of the [online demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/) and the [front-end interface](https://github.com/opendatalab/MinerU/blob/master/projects/web/README.md). + +### 0.8.0 (2024/09/09) + +Supporting fast deployment with Dockerfile, and launching demos on Huggingface and Modelscope. + +### 0.7.1 (2024/08/30) + +Add paddle tablemaster table recognition option + +### 0.7.0b1 (2024/08/09) + +Simplified installation process, added table recognition functionality + +### 0.6.2b1 (2024/08/01) + +Optimized dependency conflict issues and installation documentation + +### Initial Open-Source Release (2024/07/05) + +MinerU project's first open-source release + diff --git a/docs/en/reference/index.md b/docs/en/reference/index.md new file mode 100644 index 0000000..638dbe3 --- /dev/null +++ b/docs/en/reference/index.md @@ -0,0 +1,27 @@ +# Reference Documentation + +This section provides detailed reference materials for MinerU project. Here you can find technical specifications, API documentation, output file formats, and version history. + +## Table of Contents + +- [Output Files Documentation](./output_files.md) - Detailed explanation of all output files and their formats +- [Changelog](./changelog.md) - Version update history and release notes + +## Documentation Overview + +### Output Files Documentation + +Understanding the output files generated by MinerU is crucial for effective use of the tool. The output files documentation provides: + +- **Visual debugging files**: Help you understand the document parsing process +- **Structured data files**: Contain detailed parsing results for further processing +- **File format specifications**: Detailed descriptions of each output file type + +### Changelog + +The changelog documents the evolution of MinerU, including: + +- **Version updates**: New features and improvements for each release +- **Bug fixes**: Issues resolved in each version +- **Breaking changes**: Important changes that may affect your usage +- **Deprecations**: Features that are being phased out diff --git a/docs/en/reference/output_files.md b/docs/en/reference/output_files.md new file mode 100644 index 0000000..ceda518 --- /dev/null +++ b/docs/en/reference/output_files.md @@ -0,0 +1,712 @@ +# MinerU Output Files Documentation + +## Overview + +After executing the `mineru` command, in addition to the main markdown file output, multiple auxiliary files are generated for debugging, quality inspection, and further processing. These files include: + +- **Visual debugging files**: Help users intuitively understand the document parsing process and results +- **Structured data files**: Contain detailed parsing data for secondary development + +The following sections provide detailed descriptions of each file's purpose and format. + +## Visual Debugging Files + +### Layout Analysis File (layout.pdf) + +**File naming format**: `{original_filename}_layout.pdf` + +**Functionality**: + +- Visualizes layout analysis results for each page +- Numbers in the top-right corner of each detection box indicate reading order +- Different background colors distinguish different types of content blocks + +**Use cases**: + +- Check if layout analysis is correct +- Verify if reading order is reasonable +- Debug layout-related issues + +![layout page example](../images/layout_example.png) + +### Text Spans File (spans.pdf) + +> [!NOTE] +> Only applicable to pipeline backend + +**File naming format**: `{original_filename}_spans.pdf` + +**Functionality**: + +- Uses different colored line boxes to annotate page content based on span type +- Used for quality inspection and issue troubleshooting + +**Use cases**: + +- Quickly troubleshoot text loss issues +- Check inline formula recognition +- Verify text segmentation accuracy + +![span page example](../images/spans_example.png) + +## Structured Data Files + +> [!IMPORTANT] +> The VLM backend output has significant changes in version 2.5 and is not backward-compatible with the pipeline backend. If you plan to build secondary development on structured outputs, please read this document carefully. + +### Pipeline Backend Output Results + +#### Model Inference Results (model.json) + +**File naming format**: `{original_filename}_model.json` + +##### Data Structure Definition + +```python +from pydantic import BaseModel, Field +from enum import IntEnum + +class CategoryType(IntEnum): + """Content category enumeration""" + title = 0 # Title + plain_text = 1 # Text + abandon = 2 # Including headers, footers, page numbers, and page annotations + figure = 3 # Image + figure_caption = 4 # Image caption + table = 5 # Table + table_caption = 6 # Table caption + table_footnote = 7 # Table footnote + isolate_formula = 8 # Interline formula + formula_caption = 9 # Interline formula number + embedding = 13 # Inline formula + isolated = 14 # Interline formula + text = 15 # OCR recognition result + +class PageInfo(BaseModel): + """Page information""" + page_no: int = Field(description="Page number, first page is 0", ge=0) + height: int = Field(description="Page height", gt=0) + width: int = Field(description="Page width", ge=0) + +class ObjectInferenceResult(BaseModel): + """Object recognition result""" + category_id: CategoryType = Field(description="Category", ge=0) + poly: list[float] = Field(description="Quadrilateral coordinates, format: [x0,y0,x1,y1,x2,y2,x3,y3]") + score: float = Field(description="Confidence score of inference result") + latex: str | None = Field(description="LaTeX parsing result", default=None) + html: str | None = Field(description="HTML parsing result", default=None) + +class PageInferenceResults(BaseModel): + """Page inference results""" + layout_dets: list[ObjectInferenceResult] = Field(description="Page recognition results") + page_info: PageInfo = Field(description="Page metadata") + +# Complete inference results +inference_result: list[PageInferenceResults] = [] +``` + +##### Coordinate System Description + +`poly` coordinate format: `[x0, y0, x1, y1, x2, y2, x3, y3]` + +- Represents coordinates of top-left, top-right, bottom-right, bottom-left points respectively +- Coordinate origin is at the top-left corner of the page + +![poly coordinate diagram](../images/poly.png) + +##### Sample Data + +```json +[ + { + "layout_dets": [ + { + "category_id": 2, + "poly": [ + 99.1906967163086, + 100.3119125366211, + 730.3707885742188, + 100.3119125366211, + 730.3707885742188, + 245.81326293945312, + 99.1906967163086, + 245.81326293945312 + ], + "score": 0.9999997615814209 + } + ], + "page_info": { + "page_no": 0, + "height": 2339, + "width": 1654 + } + }, + { + "layout_dets": [ + { + "category_id": 5, + "poly": [ + 99.13092803955078, + 2210.680419921875, + 497.3183898925781, + 2210.680419921875, + 497.3183898925781, + 2264.78076171875, + 99.13092803955078, + 2264.78076171875 + ], + "score": 0.9999997019767761 + } + ], + "page_info": { + "page_no": 1, + "height": 2339, + "width": 1654 + } + } +] +``` + +#### Intermediate Processing Results (middle.json) + +**File naming format**: `{original_filename}_middle.json` + +##### Top-level Structure + +| Field Name | Type | Description | +|------------|------|-------------| +| `pdf_info` | `list[dict]` | Array of parsing results for each page | +| `_backend` | `string` | Parsing mode: `pipeline` or `vlm` | +| `_version_name` | `string` | MinerU version number | + +##### Page Information Structure (pdf_info) + +| Field Name | Description | +|------------|-------------| +| `preproc_blocks` | Unsegmented intermediate results after PDF preprocessing | +| `page_idx` | Page number, starting from 0 | +| `page_size` | Page width and height `[width, height]` | +| `images` | Image block information list | +| `tables` | Table block information list | +| `interline_equations` | Interline formula block information list | +| `discarded_blocks` | Block information to be discarded | +| `para_blocks` | Content block results after segmentation | + +##### Block Structure Hierarchy + +``` +Level 1 blocks (table | image) +└── Level 2 blocks + └── Lines + └── Spans +``` + +##### Level 1 Block Fields + +| Field Name | Description | +|------------|-------------| +| `type` | Block type: `table` or `image` | +| `bbox` | Rectangular box coordinates of the block `[x0, y0, x1, y1]` | +| `blocks` | List of contained level 2 blocks | + +##### Level 2 Block Fields + +| Field Name | Description | +|------------|-------------| +| `type` | Block type (see table below) | +| `bbox` | Rectangular box coordinates of the block | +| `lines` | List of contained line information | + +##### Level 2 Block Types + +| Type | Description | +|------|-------------| +| `image_body` | Image body | +| `image_caption` | Image caption text | +| `image_footnote` | Image footnote | +| `table_body` | Table body | +| `table_caption` | Table caption text | +| `table_footnote` | Table footnote | +| `text` | Text block | +| `title` | Title block | +| `index` | Index block | +| `list` | List block | +| `interline_equation` | Interline formula block | + +##### Line and Span Structure + +**Line fields**: +- `bbox`: Rectangular box coordinates of the line +- `spans`: List of contained spans + +**Span fields**: +- `bbox`: Rectangular box coordinates of the span +- `type`: Span type (`image`, `table`, `text`, `inline_equation`, `interline_equation`) +- `content` | `img_path`: Text content or image path + +##### Sample Data + +```json +{ + "pdf_info": [ + { + "preproc_blocks": [ + { + "type": "text", + "bbox": [ + 52, + 61.956024169921875, + 294, + 82.99800872802734 + ], + "lines": [ + { + "bbox": [ + 52, + 61.956024169921875, + 294, + 72.0000228881836 + ], + "spans": [ + { + "bbox": [ + 54.0, + 61.956024169921875, + 296.2261657714844, + 72.0000228881836 + ], + "content": "dependent on the service headway and the reliability of the departure ", + "type": "text", + "score": 1.0 + } + ] + } + ] + } + ], + "layout_bboxes": [ + { + "layout_bbox": [ + 52, + 61, + 294, + 731 + ], + "layout_label": "V", + "sub_layout": [] + } + ], + "page_idx": 0, + "page_size": [ + 612.0, + 792.0 + ], + "_layout_tree": [], + "images": [], + "tables": [], + "interline_equations": [], + "discarded_blocks": [], + "para_blocks": [ + { + "type": "text", + "bbox": [ + 52, + 61.956024169921875, + 294, + 82.99800872802734 + ], + "lines": [ + { + "bbox": [ + 52, + 61.956024169921875, + 294, + 72.0000228881836 + ], + "spans": [ + { + "bbox": [ + 54.0, + 61.956024169921875, + 296.2261657714844, + 72.0000228881836 + ], + "content": "dependent on the service headway and the reliability of the departure ", + "type": "text", + "score": 1.0 + } + ] + } + ] + } + ] + } + ], + "_backend": "pipeline", + "_version_name": "0.6.1" +} +``` + +#### Content List (content_list.json) + +**File naming format**: `{original_filename}_content_list.json` + +##### Functionality + +This is a simplified version of `middle.json` that stores all readable content blocks in reading order as a flat structure, removing complex layout information for easier subsequent processing. + +##### Content Types + +| Type | Description | +|------|-------------| +| `image` | Image | +| `table` | Table | +| `text` | Text/Title | +| `equation` | Interline formula | + +##### Text Level Identification + +Text levels are distinguished through the `text_level` field: + +- No `text_level` or `text_level: 0`: Body text +- `text_level: 1`: Level 1 heading +- `text_level: 2`: Level 2 heading +- And so on... + +##### Common Fields + +- All content blocks include a `page_idx` field indicating the page number (starting from 0). +- All content blocks include a `bbox` field representing the bounding box coordinates of the content block `[x0, y0, x1, y1]`, mapped to a range of 0-1000. + +##### Sample Data + +```json +[ + { + "type": "text", + "text": "The response of flow duration curves to afforestation ", + "text_level": 1, + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 0 + }, + { + "type": "image", + "img_path": "images/a8ecda1c69b27e4f79fce1589175a9d721cbdc1cf78b4cc06a015f3746f6b9d8.jpg", + "image_caption": [ + "Fig. 1. Annual flow duration curves of daily flows from Pine Creek, Australia, 1989–2000. " + ], + "image_footnote": [], + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 1 + }, + { + "type": "equation", + "img_path": "images/181ea56ef185060d04bf4e274685f3e072e922e7b839f093d482c29bf89b71e8.jpg", + "text": "$$\nQ _ { \\% } = f ( P ) + g ( T )\n$$", + "text_format": "latex", + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 2 + }, + { + "type": "table", + "img_path": "images/e3cb413394a475e555807ffdad913435940ec637873d673ee1b039e3bc3496d0.jpg", + "table_caption": [ + "Table 2 Significance of the rainfall and time terms " + ], + "table_footnote": [ + "indicates that the rainfall term was significant at the $5 \\%$ level, $T$ indicates that the time term was significant at the $5 \\%$ level, \\* represents significance at the $10 \\%$ level, and na denotes too few data points for meaningful analysis. " + ], + "table_body": "
SitePercentile
102030405060708090100
Traralgon CkPP,*PPP,P,P,P,PP
RedhillP,TP,T,***P.TP,*P*P**,*
Pine CkP,TP,TP,TP,TTTTnana
Stewarts Ck 5P,TP,TP,TP,TP.TP.TP,Tnanana
Glendhu 2PP,TP,*P,TP.TP,nsP,TP,TP,TP,T
Cathedral Peak 2P,TP,TP,TP,TP,T*,TP,TP,TP,TT
Cathedral Peak 3P.TP.TP,TP,TP,TTP,TP,TP,TT
Lambrechtsbos AP,TPPP,T*,T*,T*,T*,T*,TT
Lambrechtsbos BP,TP,TP,TP,TP,TP,TP,TP,TTT
BiesievleiP,TP.TP,TP,T*,T*,TTTP,TP,T
", + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 5 + } +] +``` + +### VLM Backend Output Results + +#### Model Inference Results (model.json) + +**File naming format**: `{original_filename}_model.json` + +##### File format description +- Two-level nested list: outer list = pages; inner list = content blocks of that page +- Each block is a dict with at least: `type`, `bbox`, `angle`, `content` (some types add extra fields like `score`, `block_tags`, `content_tags`, `format`) +- Designed for direct, raw model inspection + +##### Supported content types (type field values) +```json +{ + "text": "Plain text", + "title": "Title", + "equation": "Display (interline) formula", + "image": "Image", + "image_caption": "Image caption", + "image_footnote": "Image footnote", + "table": "Table", + "table_caption": "Table caption", + "table_footnote": "Table footnote", + "phonetic": "Phonetic annotation", + "code": "Code block", + "code_caption": "Code caption", + "ref_text": "Reference / citation entry", + "algorithm": "Algorithm block (treated as code subtype)", + "list": "List container", + "header": "Page header", + "footer": "Page footer", + "page_number": "Page number", + "aside_text": "Side / margin note", + "page_footnote": "Page footnote" +} +``` + +##### Coordinate system +- `bbox` = `[x0, y0, x1, y1]` (top-left, bottom-right) +- Origin at top-left of the page +- All coordinates are normalized percentages in `[0,1]` + +##### Sample data +```json +[ + [ + { + "type": "header", + "bbox": [0.077, 0.095, 0.18, 0.181], + "angle": 0, + "score": null, + "block_tags": null, + "content": "ELSEVIER", + "format": null, + "content_tags": null + }, + { + "type": "title", + "bbox": [0.157, 0.228, 0.833, 0.253], + "angle": 0, + "score": null, + "block_tags": null, + "content": "The response of flow duration curves to afforestation", + "format": null, + "content_tags": null + } + ] +] +``` + +#### Intermediate Processing Results (middle.json) + +**File naming format**: `{original_filename}_middle.json` + +Structure is broadly similar to the pipeline backend, but with these differences: + +- `list` becomes a second‑level block, a new field `sub_type` distinguishes list categories: + * `text`: ordinary list + * `ref_text`: reference / bibliography style list +- New `code` block type with `sub_type`(a code block always has at least a `code_body`, it may optionally have a `code_caption`): + * `code` + * `algorithm` +- `discarded_blocks` may contain additional types: + * `header` + * `footer` + * `page_number` + * `aside_text` + * `page_footnote` +- All blocks include an `angle` field indicating rotation (one of `0, 90, 180, 270`). + +##### Examples +- Example: list block + ```json + { + "bbox": [174,155,818,333], + "type": "list", + "angle": 0, + "index": 11, + "blocks": [ + { + "bbox": [174,157,311,175], + "type": "text", + "angle": 0, + "lines": [ + { + "bbox": [174,157,311,175], + "spans": [ + { + "bbox": [174,157,311,175], + "type": "text", + "content": "H.1 Introduction" + } + ] + } + ], + "index": 3 + }, + { + "bbox": [175,182,464,229], + "type": "text", + "angle": 0, + "lines": [ + { + "bbox": [175,182,464,229], + "spans": [ + { + "bbox": [175,182,464,229], + "type": "text", + "content": "H.2 Example: Divide by Zero without Exception Handling" + } + ] + } + ], + "index": 4 + } + ], + "sub_type": "text" + } + ``` + +- Example: code block with optional caption: + ```json + { + "type": "code", + "bbox": [114,780,885,1231], + "blocks": [ + { + "bbox": [114,780,885,1231], + "lines": [ + { + "bbox": [114,780,885,1231], + "spans": [ + { + "bbox": [114,780,885,1231], + "type": "text", + "content": "1 // Fig. H.1: DivideByZeroNoExceptionHandling.java \n2 // Integer division without exception handling. \n3 import java.util.Scanner; \n4 \n5 public class DivideByZeroNoExceptionHandling \n6 { \n7 // demonstrates throwing an exception when a divide-by-zero occurs \n8 public static int quotient( int numerator, int denominator ) \n9 { \n10 return numerator / denominator; // possible division by zero \n11 } // end method quotient \n12 \n13 public static void main(String[] args) \n14 { \n15 Scanner scanner = new Scanner(System.in); // scanner for input \n16 \n17 System.out.print(\"Please enter an integer numerator: \"); \n18 int numerator = scanner.nextInt(); \n19 System.out.print(\"Please enter an integer denominator: \"); \n20 int denominator = scanner.nextInt(); \n21" + } + ] + } + ], + "index": 17, + "angle": 0, + "type": "code_body" + }, + { + "bbox": [867,160,1280,189], + "lines": [ + { + "bbox": [867,160,1280,189], + "spans": [ + { + "bbox": [867,160,1280,189], + "type": "text", + "content": "Algorithm 1 Modules for MCTSteg" + } + ] + } + ], + "index": 19, + "angle": 0, + "type": "code_caption" + } + ], + "index": 17, + "sub_type": "code" + } + ``` + +#### Content List (content_list.json) + +**File naming format**: `{original_filename}_content_list.json` + +Based on the pipeline format, with these VLM-specific extensions: + +- New `code` type with `sub_type` (`code` | `algorithm`): + * Fields: `code_body` (string), optional `code_caption` (list of strings) +- New `list` type with `sub_type` (`text` | `ref_text`): + * Field: `list_items` (array of strings) +- All `discarded_blocks` entries are also output (e.g., headers, footers, page numbers, margin notes, page footnotes). +- Existing types (`image`, `table`, `text`, `equation`) remain unchanged. +- `bbox` still uses the 0–1000 normalized coordinate mapping. + + +##### Examples +Example: code (algorithm) entry +```json +{ + "type": "code", + "sub_type": "algorithm", + "code_caption": ["Algorithm 1 Modules for MCTSteg"], + "code_body": "1: function GETCOORDINATE(d) \n2: $x \\gets d / l$ , $y \\gets d$ mod $l$ \n3: return $(x, y)$ \n4: end function \n5: function BESTCHILD(v) \n6: $C \\gets$ child set of $v$ \n7: $v' \\gets \\arg \\max_{c \\in C} \\mathrm{UCTScore}(c)$ \n8: $v'.n \\gets v'.n + 1$ \n9: return $v'$ \n10: end function \n11: function BACK PROPAGATE(v) \n12: Calculate $R$ using Equation 11 \n13: while $v$ is not a root node do \n14: $v.r \\gets v.r + R$ , $v \\gets v.p$ \n15: end while \n16: end function \n17: function RANDOMSEARCH(v) \n18: while $v$ is not a leaf node do \n19: Randomly select an untried action $a \\in A(v)$ \n20: Create a new node $v'$ \n21: $(x, y) \\gets \\mathrm{GETCOORDINATE}(v'.d)$ \n22: $v'.p \\gets v$ , $v'.d \\gets v.d + 1$ , $v'.\\Gamma \\gets v.\\Gamma$ \n23: $v'.\\gamma_{x,y} \\gets a$ \n24: if $a = -1$ then \n25: $v.lc \\gets v'$ \n26: else if $a = 0$ then \n27: $v.mc \\gets v'$ \n28: else \n29: $v.rc \\gets v'$ \n30: end if \n31: $v \\gets v'$ \n32: end while \n33: return $v$ \n34: end function \n35: function SEARCH(v) \n36: while $v$ is fully expanded do \n37: $v \\gets$ BESTCHILD(v) \n38: end while \n39: if $v$ is not a leaf node then \n40: $v \\gets$ RANDOMSEARCH(v) \n41: end if \n42: return $v$ \n43: end function", + "bbox": [510,87,881,740], + "page_idx": 0 +} +``` + +Example: list (text) entry +```json +{ + "type": "list", + "sub_type": "text", + "list_items": [ + "H.1 Introduction", + "H.2 Example: Divide by Zero without Exception Handling", + "H.3 Example: Divide by Zero with Exception Handling", + "H.4 Summary" + ], + "bbox": [174,155,818,333], + "page_idx": 0 +} +``` + +Example: discarded blocks output +```json +[ + { + "type": "header", + "text": "Journal of Hydrology 310 (2005) 253-265", + "bbox": [363,164,623,177], + "page_idx": 0 + }, + { + "type": "page_footnote", + "text": "* Corresponding author. Address: Forest Science Centre, Department of Sustainability and Environment, P.O. Box 137, Heidelberg, Vic. 3084, Australia. Tel.: +61 3 9450 8719; fax: +61 3 9450 8644.", + "bbox": [71,815,915,841], + "page_idx": 0 + } +] +``` + +## Summary + +The above files constitute MinerU's complete output results. Users can choose appropriate files for subsequent processing based on their needs: + +- **Model outputs** (Use raw outputs): + * model.json + +- **Debugging and verification** (Use visualization files): + * layout.pdf + * spans.pdf + +- **Content extraction**: (Use simplified files): + * *.md + * content_list.json + +- **Secondary development**: (Use structured files): + * middle.json diff --git a/docs/en/usage/advanced_cli_parameters.md b/docs/en/usage/advanced_cli_parameters.md new file mode 100644 index 0000000..9b57144 --- /dev/null +++ b/docs/en/usage/advanced_cli_parameters.md @@ -0,0 +1,53 @@ +# Advanced Command Line Parameters + +## Pass-through of inference engine parameters + +### vllm Acceleration Parameter Optimization +> [!TIP] +> If you can already use vllm normally for accelerated VLM model inference but still want to further improve inference speed, you can try the following parameters: +> +> - If you have multiple graphics cards, you can use vllm's multi-card parallel mode to increase throughput: `--data-parallel-size 2` + +### Parameter Passing Instructions +> [!TIP] +> - All officially supported vllm/lmdeploy parameters can be passed to MinerU through command line arguments, including the following commands: `mineru`, `mineru-openai-server`, `mineru-gradio`, `mineru-api` +> - If you want to learn more about `vllm` parameter usage, please refer to the [vllm official documentation](https://docs.vllm.ai/en/latest/cli/serve.html) +> - If you want to learn more about `lmdeploy` parameter usage, please refer to the [lmdeploy official documentation](https://lmdeploy.readthedocs.io/en/latest/llm/api_server.html) + +## GPU Device Selection and Configuration + +### CUDA_VISIBLE_DEVICES Basic Usage +> [!TIP] +> - In any situation, you can specify visible GPU devices by adding the `CUDA_VISIBLE_DEVICES` environment variable at the beginning of the command line. For example: +> ```bash +> CUDA_VISIBLE_DEVICES=1 mineru -p -o +> ``` +> - This specification method is effective for all command line calls, including `mineru`, `mineru-openai-server`, `mineru-gradio`, and `mineru-api`, and applies to both `pipeline` and `vlm` backends. + +### Common Device Configuration Examples +> [!TIP] +> Here are some common `CUDA_VISIBLE_DEVICES` setting examples: +> ```bash +> CUDA_VISIBLE_DEVICES=1 # Only device 1 will be seen +> CUDA_VISIBLE_DEVICES=0,1 # Devices 0 and 1 will be visible +> CUDA_VISIBLE_DEVICES="0,1" # Same as above, quotation marks are optional +> CUDA_VISIBLE_DEVICES=0,2,3 # Devices 0, 2, 3 will be visible; device 1 is masked +> CUDA_VISIBLE_DEVICES="" # No GPU will be visible +> ``` + +## Practical Application Scenarios +> [!TIP] +> Here are some possible usage scenarios: +> +> - If you have multiple graphics cards and need to specify cards 0 and 1, using multi-card parallelism to start `openai-server`, you can use the following command: +> ```bash +> CUDA_VISIBLE_DEVICES=0,1 mineru-openai-server --engine vllm --port 30000 --data-parallel-size 2 +> ``` +> +> - If you have multiple graphics cards and need to start two `fastapi` services on cards 0 and 1, listening on different ports respectively, you can use the following commands: +> ```bash +> # In terminal 1 +> CUDA_VISIBLE_DEVICES=0 mineru-api --host 127.0.0.1 --port 8000 +> # In terminal 2 +> CUDA_VISIBLE_DEVICES=1 mineru-api --host 127.0.0.1 --port 8001 +> ``` diff --git a/docs/en/usage/cli_tools.md b/docs/en/usage/cli_tools.md new file mode 100644 index 0000000..946d63d --- /dev/null +++ b/docs/en/usage/cli_tools.md @@ -0,0 +1,127 @@ +# Command Line Tools Usage Instructions + +## View Help Information +To view help information for MinerU command line tools, you can use the `--help` parameter. Here are help information examples for various command line tools: +```bash +mineru --help +Usage: mineru [OPTIONS] + +Options: + -v, --version Show version and exit + -p, --path PATH Input file path or directory (required) + -o, --output PATH Output directory (required) + -m, --method [auto|txt|ocr] Parsing method: auto (default), txt, ocr (pipeline and hybrid* backend only) + -b, --backend [pipeline|hybrid-auto-engine|hybrid-http-client|vlm-auto-engine|vlm-http-client] + Parsing backend (default: hybrid-auto-engine) + -l, --lang [ch|ch_server|ch_lite|en|korean|japan|chinese_cht|ta|te|ka|th|el|latin|arabic|east_slavic|cyrillic|devanagari] + Specify document language (improves OCR accuracy, pipeline and hybrid* backend only) + -u, --url TEXT Service address when using http-client + -s, --start INTEGER Starting page number for parsing (0-based) + -e, --end INTEGER Ending page number for parsing (0-based) + -f, --formula BOOLEAN Enable formula parsing (default: enabled) + -t, --table BOOLEAN Enable table parsing (default: enabled) + -d, --device TEXT Inference device (e.g., cpu/cuda/cuda:0/npu/mps, pipeline and vlm-transformers backend only) + --vram INTEGER Maximum GPU VRAM usage per process (GB) (pipeline backend only) + --source [huggingface|modelscope|local] + Model source, default: huggingface + --help Show help information +``` +```bash +mineru-api --help +Usage: mineru-api [OPTIONS] + +Options: + --host TEXT Server host (default: 127.0.0.1) + --port INTEGER Server port (default: 8000) + --reload Enable auto-reload (development mode) + --help Show this message and exit. +``` +```bash +mineru-gradio --help +Usage: mineru-gradio [OPTIONS] + +Options: + --enable-example BOOLEAN Enable example files for input. The example + files to be input need to be placed in the + `example` folder within the directory where + the command is currently executed. + --enable-http-client BOOLEAN Enable http-client backend to link openai- + compatible servers. + --enable-api BOOLEAN Enable gradio API for serving the + application. + --max-convert-pages INTEGER Set the maximum number of pages to convert + from PDF to Markdown. + --server-name TEXT Set the server name for the Gradio app. + --server-port INTEGER Set the server port for the Gradio app. + --latex-delimiters-type [a|b|all] + Set the type of LaTeX delimiters to use in + Markdown rendering: 'a' for type '$', 'b' for + type '()[]', 'all' for both types. + --help Show this message and exit. +``` + +## Environment Variables Description + +Some parameters of MinerU command line tools have equivalent environment variable configurations. Generally, environment variable configurations have higher priority than command line parameters and take effect across all command line tools. +Here are the environment variables and their descriptions: + +- `MINERU_DEVICE_MODE`: + * Used to specify inference device + * supports device types like `cpu/cuda/cuda:0/npu/mps` + * only effective for `pipeline` and `vlm-transformers` backends. + +- `MINERU_VIRTUAL_VRAM_SIZE`: + * Used to specify maximum GPU VRAM usage per process (GB) + * only effective for `pipeline` backend. + +- `MINERU_MODEL_SOURCE`: + * Used to specify model source + * supports `huggingface/modelscope/local` + * defaults to `huggingface`, can be switched to `modelscope` or local models through environment variables. + +- `MINERU_TOOLS_CONFIG_JSON`: + * Used to specify configuration file path + * defaults to `mineru.json` in user directory, can specify other configuration file paths through environment variables. + +- `MINERU_FORMULA_ENABLE`: + * Used to enable formula parsing + * defaults to `true`, can be set to `false` through environment variables to disable formula parsing. + +- `MINERU_FORMULA_CH_SUPPORT`: + * Used to enable Chinese formula parsing optimization (experimental feature) + * Default is `false`, can be set to `true` via environment variable to enable Chinese formula parsing optimization. + * Only effective for `pipeline` backend. + +- `MINERU_TABLE_ENABLE`: + * Used to enable table parsing + * Default is `true`, can be set to `false` via environment variable to disable table parsing. + +- `MINERU_TABLE_MERGE_ENABLE`: + * Used to enable table merging functionality + * Default is `true`, can be set to `false` via environment variable to disable table merging functionality. + +- `MINERU_PDF_RENDER_TIMEOUT`: + * Used to set the timeout period (in seconds) for rendering PDF to images + * Default is `300` seconds, can be set to other values via environment variable to adjust the image rendering timeout. + +- `MINERU_INTRA_OP_NUM_THREADS`: + * Used to set the intra_op thread count for ONNX models, affects the computation speed of individual operators + * Default is `-1` (auto-select), can be set to other values via environment variable to adjust the thread count. + +- `MINERU_INTER_OP_NUM_THREADS`: + * Used to set the inter_op thread count for ONNX models, affects the parallel execution of multiple operators + * Default is `-1` (auto-select), can be set to other values via environment variable to adjust the thread count. + +- `MINERU_HYBRID_BATCH_RATIO`: + * Used to set the batch ratio for small model processing in `hybrid-*` backends. + * Commonly used in `hybrid-http-client`, it allows adjusting the VRAM usage of a single client by controlling the batch ratio of small models. + * Single Client VRAM Size | MINERU_HYBRID_BATCH_RATIO + ------------------------|-------------------------- + <= 6 GB | 8 + <= 4.5 GB | 4 + <= 3 GB | 2 + <= 2.5 GB | 1 + +- `MINERU_HYBRID_FORCE_PIPELINE_ENABLE`: + * Used to force the text extraction part in `hybrid-*` backends to be processed using small models. + * Defaults to `false`. Can be set to `true` via environment variable to enable this feature, thereby reducing hallucinations in certain extreme cases. diff --git a/docs/en/usage/index.md b/docs/en/usage/index.md new file mode 100644 index 0000000..bee4978 --- /dev/null +++ b/docs/en/usage/index.md @@ -0,0 +1,16 @@ +# Usage Guide + +This section provides comprehensive usage instructions for the project. We will help you progressively master the project's usage from basic to advanced through the following sections: + +## Table of Contents + +- [Quick Usage](./quick_usage.md) - Quick setup and basic usage +- [Model Source Configuration](./model_source.md) - Detailed configuration instructions for model sources +- [Command Line Tools](./cli_tools.md) - Detailed parameter descriptions for command line tools +- [Advanced Optimization Parameters](./advanced_cli_parameters.md) - Advanced parameter descriptions for command line tool adaptation + +## Getting Started + +We recommend reading the documentation in the order listed above, which will help you better understand and use the project features. + +If you encounter issues during usage, please check the [FAQ](../faq/index.md) \ No newline at end of file diff --git a/docs/en/usage/model_source.md b/docs/en/usage/model_source.md new file mode 100644 index 0000000..a6a840c --- /dev/null +++ b/docs/en/usage/model_source.md @@ -0,0 +1,55 @@ +# Model Source Documentation + +MinerU uses `HuggingFace` and `ModelScope` as model repositories. Users can switch model sources or use local models as needed. + +- `HuggingFace` is the default model source, providing excellent loading speed and high stability globally. +- `ModelScope` is the best choice for users in mainland China, providing seamlessly compatible `hf` SDK modules, suitable for users who cannot access HuggingFace. + +## Methods to Switch Model Sources + +### Switch via Command Line Parameters +Currently, only the `mineru` command line tool supports switching model sources through command line parameters. Other command line tools such as `mineru-api`, `mineru-gradio`, etc., do not support this yet. +```bash +mineru -p -o --source modelscope +``` + +### Switch via Environment Variables +You can switch model sources by setting environment variables in any situation. This applies to all command line tools and API calls. +```bash +export MINERU_MODEL_SOURCE=modelscope +``` +or +```python +import os +os.environ["MINERU_MODEL_SOURCE"] = "modelscope" +``` +>[!TIP] +> Model sources set through environment variables will take effect in the current terminal session until the terminal is closed or the environment variable is modified. They have higher priority than command line parameters - if both command line parameters and environment variables are set, the command line parameters will be ignored. + +## Using Local Models + +### 1. Download Models to Local Storage +```bash +mineru-models-download --help +``` +or use the interactive command line tool to select model downloads: +```bash +mineru-models-download +``` +> [!NOTE] +>- After download completion, the model path will be output in the current terminal window and automatically written to `mineru.json` in the user directory. +>- You can also create it by copying the [configuration template file](https://github.com/opendatalab/MinerU/blob/master/mineru.template.json) to your user directory and renaming it to `mineru.json`. +>- After downloading models locally, you can freely move the model folder to other locations while updating the model path in `mineru.json`. +>- If you deploy the model folder to another server, please ensure you move the `mineru.json` file to the user directory of the new device and configure the model path correctly. +>- If you need to update model files, you can run the `mineru-models-download` command again. Model updates do not support custom paths currently - if you haven't moved the local model folder, model files will be incrementally updated; if you have moved the model folder, model files will be re-downloaded to the default location and `mineru.json` will be updated. + +### 2. Use Local Models for Parsing + +```bash +mineru -p -o --source local +``` +or enable through environment variables: +```bash +export MINERU_MODEL_SOURCE=local +mineru -p -o +``` diff --git a/docs/en/usage/quick_usage.md b/docs/en/usage/quick_usage.md new file mode 100644 index 0000000..fe35020 --- /dev/null +++ b/docs/en/usage/quick_usage.md @@ -0,0 +1,100 @@ +# Using MinerU + +## Quick Model Source Configuration +MinerU uses `huggingface` as the default model source. If users cannot access `huggingface` due to network restrictions, they can conveniently switch the model source to `modelscope` through environment variables: +```bash +export MINERU_MODEL_SOURCE=modelscope +``` +For more information about model source configuration and custom local model paths, please refer to the [Model Source Documentation](./model_source.md) in the documentation. + +## Quick Usage via Command Line +MinerU has built-in command line tools that allow users to quickly use MinerU for PDF parsing through the command line: +```bash +mineru -p -o +``` +> [!TIP] +>- ``: Local PDF/image file or directory +>- ``: Output directory +> +> For more information about output files, please refer to [Output File Documentation](../reference/output_files.md). + +> [!NOTE] +> The command line tool will automatically attempt cuda/mps acceleration on Linux and macOS systems. +> Windows users who need cuda acceleration should visit the [PyTorch official website](https://pytorch.org/get-started/locally/) to select the appropriate command for their cuda version to install acceleration-enabled `torch` and `torchvision`. + +If you need to adjust parsing options through custom parameters, you can also check the more detailed [Command Line Tools Usage Instructions](./cli_tools.md) in the documentation. + +## Advanced Usage via API, WebUI, http-client/server + +- Direct Python API calls: [Python Usage Example](https://github.com/opendatalab/MinerU/blob/master/demo/demo.py) +- FastAPI calls: + ```bash + mineru-api --host 0.0.0.0 --port 8000 + ``` + >[!TIP] + >Access `http://127.0.0.1:8000/docs` in your browser to view the API documentation. +- Start Gradio WebUI visual frontend: + ```bash + mineru-gradio --server-name 0.0.0.0 --server-port 7860 + ``` + >[!TIP] + > + >- Access `http://127.0.0.1:7860` in your browser to use the Gradio WebUI. + +- Using `http-client/server` method: + ```bash + # Start openai compatible server (requires vllm or lmdeploy environment) + mineru-openai-server --port 30000 + ``` + >[!TIP] + >In another terminal, connect to openai server via http client + > ```bash + > mineru -p -o -b hybrid-http-client -u http://127.0.0.1:30000 + > ``` + +> [!NOTE] +> All officially supported `vllm/lmdeploy` parameters can be passed to MinerU through command line arguments, including the following commands: `mineru`, `mineru-openai-server`, `mineru-gradio`, `mineru-api`. +> We have compiled some commonly used parameters and usage methods for `vllm/lmdeploy`, which can be found in the documentation [Advanced Command Line Parameters](./advanced_cli_parameters.md). + +## Extending MinerU Functionality with Configuration Files + +MinerU is now ready to use out of the box, but also supports extending functionality through configuration files. You can edit `mineru.json` file in your user directory to add custom configurations. + +>[!IMPORTANT] +>The `mineru.json` file will be automatically generated when you use the built-in model download command `mineru-models-download`, or you can create it by copying the [configuration template file](https://github.com/opendatalab/MinerU/blob/master/mineru.template.json) to your user directory and renaming it to `mineru.json`. + +Here are some available configuration options: + +- `latex-delimiter-config`: + * Used to configure LaTeX formula delimiters + * Defaults to `$` symbol, can be modified to other symbols or strings as needed. + +- `llm-aided-config`: + * Used to configure parameters for LLM-assisted title hierarchy + * Compatible with all LLM models supporting `openai protocol`, defaults to using Alibaba Cloud Bailian's `qwen3-next-80b-a3b-instruct` model. + * You need to configure your own API key and set `enable` to `true` to enable this feature. + * If your API provider does not support the `enable_thinking` parameter, please manually remove it. + * For example, in your configuration file, the `llm-aided-config` section may look like: + ```json + "llm-aided-config": { + "api_key": "your_api_key", + "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "model": "qwen3-next-80b-a3b-instruct", + "enable_thinking": false, + "enable": false + } + ``` + * To remove the `enable_thinking` parameter, simply delete the line containing `"enable_thinking": false`, resulting in: + ```json + "llm-aided-config": { + "api_key": "your_api_key", + "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "model": "qwen3-next-80b-a3b-instruct", + "enable": false + } + ``` + +- `models-dir`: + * Used to specify local model storage directory + * Please specify model directories for `pipeline` and `vlm` backends separately. + * After specifying the directory, you can use local models by configuring the environment variable `export MINERU_MODEL_SOURCE=local`. diff --git a/docs/images/MinerU-logo.png b/docs/images/MinerU-logo.png new file mode 100644 index 0000000..09ab46b Binary files /dev/null and b/docs/images/MinerU-logo.png differ diff --git a/docs/images/datalab_logo.png b/docs/images/datalab_logo.png new file mode 100644 index 0000000..5019ae7 Binary files /dev/null and b/docs/images/datalab_logo.png differ diff --git a/docs/images/flowchart_en.png b/docs/images/flowchart_en.png new file mode 100644 index 0000000..b490011 Binary files /dev/null and b/docs/images/flowchart_en.png differ diff --git a/docs/images/flowchart_zh_cn.png b/docs/images/flowchart_zh_cn.png new file mode 100644 index 0000000..32e0a14 Binary files /dev/null and b/docs/images/flowchart_zh_cn.png differ diff --git a/docs/images/layout_example.png b/docs/images/layout_example.png new file mode 100644 index 0000000..4a57dff Binary files /dev/null and b/docs/images/layout_example.png differ diff --git a/docs/images/logo.png b/docs/images/logo.png new file mode 100644 index 0000000..0181808 Binary files /dev/null and b/docs/images/logo.png differ diff --git a/docs/images/logo.svg b/docs/images/logo.svg new file mode 100644 index 0000000..6553978 --- /dev/null +++ b/docs/images/logo.svg @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/images/poly.png b/docs/images/poly.png new file mode 100644 index 0000000..14af772 Binary files /dev/null and b/docs/images/poly.png differ diff --git a/docs/images/project_panorama_en.png b/docs/images/project_panorama_en.png new file mode 100644 index 0000000..19616da Binary files /dev/null and b/docs/images/project_panorama_en.png differ diff --git a/docs/images/project_panorama_zh_cn.png b/docs/images/project_panorama_zh_cn.png new file mode 100644 index 0000000..3cd6843 Binary files /dev/null and b/docs/images/project_panorama_zh_cn.png differ diff --git a/docs/images/spans_example.png b/docs/images/spans_example.png new file mode 100644 index 0000000..14de87e Binary files /dev/null and b/docs/images/spans_example.png differ diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..6669ada --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +mkdocs +mkdocs-static-i18n +markdown-gfm-admonition +mkdocs-video \ No newline at end of file diff --git a/docs/zh/demo/index.md b/docs/zh/demo/index.md new file mode 100644 index 0000000..103ffc5 --- /dev/null +++ b/docs/zh/demo/index.md @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/zh/faq/index.md b/docs/zh/faq/index.md new file mode 100644 index 0000000..e4a198d --- /dev/null +++ b/docs/zh/faq/index.md @@ -0,0 +1,29 @@ +# 常见问题解答 + +如果未能列出您的问题,您也可以使用[DeepWiki](https://deepwiki.com/opendatalab/MinerU)与AI助手交流,这可以解决大部分常见问题。 + +如果您仍然无法解决问题,您可通过[Discord](https://discord.gg/Tdedn9GTXq)或[WeChat](https://mineru.net/community-portal/?aliasId=3c430f94)加入社区,与其他用户和开发者交流。 + +??? question "在WSL2的Ubuntu22.04中遇到报错`ImportError: libGL.so.1: cannot open shared object file: No such file or directory`" + + WSL2的Ubuntu22.04中缺少`libgl`库,可通过以下命令安装`libgl`库解决: + + ```bash + sudo apt-get install libgl1-mesa-glx + ``` + + 参考:[#388](https://github.com/opendatalab/MinerU/issues/388) + +??? question "在 Linux 系统安装并使用时,解析结果缺失部份文字信息。" + + MinerU在>=2.0的版本中使用`pypdfium2`代替`pymupdf`作为PDF页面的渲染引擎,以解决AGPLv3的许可证问题,在某些Linux发行版,由于缺少CJK字体,可能会在将PDF渲染成图片的过程中丢失部份文字。 + 为了解决这个问题,您可以通过以下命令安装noto字体包,这在Ubuntu/debian系统中有效: + ```bash + sudo apt update + sudo apt install fonts-noto-core + sudo apt install fonts-noto-cjk + fc-cache -fv + ``` + 也可以直接使用我们的[Docker部署](../quick_start/docker_deployment.md)方式构建镜像,镜像中默认包含以上字体包。 + + 参考:[#2915](https://github.com/opendatalab/MinerU/issues/2915) \ No newline at end of file diff --git a/docs/zh/index.md b/docs/zh/index.md new file mode 100644 index 0000000..48af523 --- /dev/null +++ b/docs/zh/index.md @@ -0,0 +1,69 @@ +
+ +

+ +

+
+ + + +[![stars](https://img.shields.io/github/stars/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU) +[![forks](https://img.shields.io/github/forks/opendatalab/MinerU.svg)](https://github.com/opendatalab/MinerU) +[![open issues](https://img.shields.io/github/issues-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues) +[![issue resolution](https://img.shields.io/github/issues-closed-raw/opendatalab/MinerU)](https://github.com/opendatalab/MinerU/issues) +[![PyPI version](https://img.shields.io/pypi/v/mineru)](https://pypi.org/project/mineru/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mineru)](https://pypi.org/project/mineru/) +[![Downloads](https://static.pepy.tech/badge/mineru)](https://pepy.tech/project/mineru) +[![Downloads](https://static.pepy.tech/badge/mineru/month)](https://pepy.tech/project/mineru) +[![OpenDataLab](https://img.shields.io/badge/webapp_on_mineru.net-blue?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTM0IiBoZWlnaHQ9IjEzNCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJtMTIyLDljMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0idXJsKCNhKSIvPjxwYXRoIGQ9Im0xMjIsOWMwLDUtNCw5LTksOXMtOS00LTktOSw0LTksOS05LDksNCw5LDl6IiBmaWxsPSIjMDEwMTAxIi8+PHBhdGggZD0ibTkxLDE4YzAsNS00LDktOSw5cy05LTQtOS05LDQtOSw5LTksOSw0LDksOXoiIGZpbGw9InVybCgjYikiLz48cGF0aCBkPSJtOTEsMThjMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0iIzAxMDEwMSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0idXJsKCNjKSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0iIzAxMDEwMSIvPjxkZWZzPjxsaW5lYXJHcmFkaWVudCBpZD0iYSIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYiIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYyIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjwvZGVmcz48L3N2Zz4=&labelColor=white)](https://mineru.net/OpenSourceTools/Extractor?source=github) +[![ModelScope](https://img.shields.io/badge/Demo_on_ModelScope-purple?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjIzIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KCiA8Zz4KICA8dGl0bGU+TGF5ZXIgMTwvdGl0bGU+CiAgPHBhdGggaWQ9InN2Z18xNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTAsODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTUiIGZpbGw9IiM2MjRhZmYiIGQ9Im05OS4xNCwxMTUuNDlsMjUuNjUsMGwwLDI1LjY1bC0yNS42NSwwbDAsLTI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTYiIGZpbGw9IiM2MjRhZmYiIGQ9Im0xNzYuMDksMTQxLjE0bC0yNS42NDk5OSwwbDAsMjIuMTlsNDcuODQsMGwwLC00Ny44NGwtMjIuMTksMGwwLDI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTciIGZpbGw9IiMzNmNmZDEiIGQ9Im0xMjQuNzksODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTgiIGZpbGw9IiMzNmNmZDEiIGQ9Im0wLDY0LjE5bDI1LjY1LDBsMCwyNS42NWwtMjUuNjUsMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzE5IiBmaWxsPSIjNjI0YWZmIiBkPSJtMTk4LjI4LDg5Ljg0bDI1LjY0OTk5LDBsMCwyNS42NDk5OWwtMjUuNjQ5OTksMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIwIiBmaWxsPSIjMzZjZmQxIiBkPSJtMTk4LjI4LDY0LjE5bDI1LjY0OTk5LDBsMCwyNS42NWwtMjUuNjQ5OTksMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIxIiBmaWxsPSIjNjI0YWZmIiBkPSJtMTUwLjQ0LDQybDAsMjIuMTlsMjUuNjQ5OTksMGwwLDI1LjY1bDIyLjE5LDBsMCwtNDcuODRsLTQ3Ljg0LDB6Ii8+CiAgPHBhdGggaWQ9InN2Z18yMiIgZmlsbD0iIzM2Y2ZkMSIgZD0ibTczLjQ5LDg5Ljg0bDI1LjY1LDBsMCwyNS42NDk5OWwtMjUuNjUsMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIzIiBmaWxsPSIjNjI0YWZmIiBkPSJtNDcuODQsNjQuMTlsMjUuNjUsMGwwLC0yMi4xOWwtNDcuODQsMGwwLDQ3Ljg0bDIyLjE5LDBsMCwtMjUuNjV6Ii8+CiAgPHBhdGggaWQ9InN2Z18yNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTQ3Ljg0LDExNS40OWwtMjIuMTksMGwwLDQ3Ljg0bDQ3Ljg0LDBsMCwtMjIuMTlsLTI1LjY1LDBsMCwtMjUuNjV6Ii8+CiA8L2c+Cjwvc3ZnPg==&labelColor=white)](https://www.modelscope.cn/studios/OpenDataLab/MinerU) +[![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAF8AAABYCAMAAACkl9t/AAAAk1BMVEVHcEz/nQv/nQv/nQr/nQv/nQr/nQv/nQv/nQr/wRf/txT/pg7/yRr/rBD/zRz/ngv/oAz/zhz/nwv/txT/ngv/0B3+zBz/nQv/0h7/wxn/vRb/thXkuiT/rxH/pxD/ogzcqyf/nQvTlSz/czCxky7/SjifdjT/Mj3+Mj3wMj15aTnDNz+DSD9RTUBsP0FRO0Q6O0WyIxEIAAAAGHRSTlMADB8zSWF3krDDw8TJ1NbX5efv8ff9/fxKDJ9uAAAGKklEQVR42u2Z63qjOAyGC4RwCOfB2JAGqrSb2WnTw/1f3UaWcSGYNKTdf/P+mOkTrE+yJBulvfvLT2A5ruenaVHyIks33npl/6C4s/ZLAM45SOi/1FtZPyFur1OYofBX3w7d54Bxm+E8db+nDr12ttmESZ4zludJEG5S7TO72YPlKZFyE+YCYUJTBZsMiNS5Sd7NlDmKM2Eg2JQg8awbglfqgbhArjxkS7dgp2RH6hc9AMLdZYUtZN5DJr4molC8BfKrEkPKEnEVjLbgW1fLy77ZVOJagoIcLIl+IxaQZGjiX597HopF5CkaXVMDO9Pyix3AFV3kw4lQLCbHuMovz8FallbcQIJ5Ta0vks9RnolbCK84BtjKRS5uA43hYoZcOBGIG2Epbv6CvFVQ8m8loh66WNySsnN7htL58LNp+NXT8/PhXiBXPMjLSxtwp8W9f/1AngRierBkA+kk/IpUSOeKByzn8y3kAAAfh//0oXgV4roHm/kz4E2z//zRc3/lgwBzbM2mJxQEa5pqgX7d1L0htrhx7LKxOZlKbwcAWyEOWqYSI8YPtgDQVjpB5nvaHaSnBaQSD6hweDi8PosxD6/PT09YY3xQA7LTCTKfYX+QHpA0GCcqmEHvr/cyfKQTEuwgbs2kPxJEB0iNjfJcCTPyocx+A0griHSmADiC91oNGVwJ69RudYe65vJmoqfpul0lrqXadW0jFKH5BKwAeCq+Den7s+3zfRJzA61/Uj/9H/VzLKTx9jFPPdXeeP+L7WEvDLAKAIoF8bPTKT0+TM7W8ePj3Rz/Yn3kOAp2f1Kf0Weony7pn/cPydvhQYV+eFOfmOu7VB/ViPe34/EN3RFHY/yRuT8ddCtMPH/McBAT5s+vRde/gf2c/sPsjLK+m5IBQF5tO+h2tTlBGnP6693JdsvofjOPnnEHkh2TnV/X1fBl9S5zrwuwF8NFrAVJVwCAPTe8gaJlomqlp0pv4Pjn98tJ/t/fL++6unpR1YGC2n/KCoa0tTLoKiEeUPDl94nj+5/Tv3/eT5vBQ60X1S0oZr+IWRR8Ldhu7AlLjPISlJcO9vrFotky9SpzDequlwEir5beYAc0R7D9KS1DXva0jhYRDXoExPdc6yw5GShkZXe9QdO/uOvHofxjrV/TNS6iMJS+4TcSTgk9n5agJdBQbB//IfF/HpvPt3Tbi7b6I6K0R72p6ajryEJrENW2bbeVUGjfgoals4L443c7BEE4mJO2SpbRngxQrAKRudRzGQ8jVOL2qDVjjI8K1gc3TIJ5KiFZ1q+gdsARPB4NQS4AjwVSt72DSoXNyOWUrU5mQ9nRYyjp89Xo7oRI6Bga9QNT1mQ/ptaJq5T/7WcgAZywR/XlPGAUDdet3LE+qS0TI+g+aJU8MIqjo0Kx8Ly+maxLjJmjQ18rA0YCkxLQbUZP1WqdmyQGJLUm7VnQFqodmXSqmRrdVpqdzk5LvmvgtEcW8PMGdaS23EOWyDVbACZzUJPaqMbjDxpA3Qrgl0AikimGDbqmyT8P8NOYiqrldF8rX+YN7TopX4UoHuSCYY7cgX4gHwclQKl1zhx0THf+tCAUValzjI7Wg9EhptrkIcfIJjA94evOn8B2eHaVzvBrnl2ig0So6hvPaz0IGcOvTHvUIlE2+prqAxLSQxZlU2stql1NqCCLdIiIN/i1DBEHUoElM9dBravbiAnKqgpi4IBkw+utSPIoBijDXJipSVV7MpOEJUAc5Qmm3BnUN+w3hteEieYKfRZSIUcXKMVf0u5wD4EwsUNVvZOtUT7A2GkffHjByWpHqvRBYrTV72a6j8zZ6W0DTE86Hn04bmyWX3Ri9WH7ZU6Q7h+ZHo0nHUAcsQvVhXRDZHChwiyi/hnPuOsSEF6Exk3o6Y9DT1eZ+6cASXk2Y9k+6EOQMDGm6WBK10wOQJCBwren86cPPWUcRAnTVjGcU1LBgs9FURiX/e6479yZcLwCBmTxiawEwrOcleuu12t3tbLv/N4RLYIBhYexm7Fcn4OJcn0+zc+s8/VfPeddZHAGN6TT8eGczHdR/Gts1/MzDkThr23zqrVfAMFT33Nx1RJsx1k5zuWILLnG/vsH+Fv5D4NTVcp1Gzo8AAAAAElFTkSuQmCC&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU) +[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/gist/myhloli/a3cb16570ab3cfeadf9d8f0ac91b4fca/mineru_demo.ipynb) +[![arXiv](https://img.shields.io/badge/MinerU-Technical%20Report-b31b1b.svg?logo=arXiv)](https://arxiv.org/abs/2409.18839) +[![arXiv](https://img.shields.io/badge/MinerU2.5-Technical%20Report-b31b1b.svg?logo=arXiv)](https://arxiv.org/abs/2509.22186) +[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/opendatalab/MinerU) + + + +## 项目简介 + +MinerU是一款将PDF转化为机器可读格式的工具(如markdown、json),可以很方便地抽取为任意格式。 +MinerU诞生于[书生-浦语](https://github.com/InternLM/InternLM)的预训练过程中,我们将会集中精力解决科技文献中的符号转化问题,希望在大模型时代为科技发展做出贡献。 +相比国内外知名商用产品MinerU还很年轻,如果遇到问题或者结果不及预期请到[issue](https://github.com/opendatalab/MinerU/issues)提交问题,同时**附上相关PDF**。 + +![type:video](https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c) + + +## 主要功能 + +- 删除页眉、页脚、脚注、页码等元素,确保语义连贯 +- 输出符合人类阅读顺序的文本,适用于单栏、多栏及复杂排版 +- 保留原文档的结构,包括标题、段落、列表等 +- 提取图像、图片描述、表格、表格标题及脚注 +- 自动识别并转换文档中的公式为LaTeX格式 +- 自动识别并转换文档中的表格为HTML格式 +- 自动检测扫描版PDF和乱码PDF,并启用OCR功能 +- OCR支持109种语言的检测与识别 +- 支持多种输出格式,如多模态与NLP的Markdown、按阅读顺序排序的JSON、含有丰富信息的中间格式等 +- 支持多种可视化结果,包括layout可视化、span可视化等,便于高效确认输出效果与质检 +- 支持纯CPU环境运行,并支持 GPU(CUDA)/NPU(CANN)/MPS 加速 +- 兼容Windows、Linux和Mac平台 + + +## 使用指南 + +- [快速上手指南](./quick_start/index.md) +- [详细使用说明](./usage/index.md) \ No newline at end of file diff --git a/docs/zh/quick_start/docker_deployment.md b/docs/zh/quick_start/docker_deployment.md new file mode 100644 index 0000000..2a5d4dd --- /dev/null +++ b/docs/zh/quick_start/docker_deployment.md @@ -0,0 +1,85 @@ +# 使用docker部署Mineru + +MinerU提供了便捷的docker部署方式,这有助于快速搭建环境并解决一些棘手的环境兼容问题。 + +## 使用 Dockerfile 构建镜像 + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/Dockerfile +docker build -t mineru:latest -f Dockerfile . +``` + +> [!TIP] +> [Dockerfile](https://github.com/opendatalab/MinerU/blob/master/docker/china/Dockerfile)默认使用`vllm/vllm-openai:v0.10.1.1`作为基础镜像, +> 该版本的显卡型号支持有限,可能仅在 Ampere、Ada Lovelace、Hopper架构上工作,如您无法在Volta、Turing、Blackwell显卡上使用vLLM加速推理,可通过更改基础镜像为`vllm/vllm-openai:v0.11.0`来解决该问题。 + +## Docker说明 + +Mineru的docker使用了`vllm/vllm-openai`作为基础镜像,因此在docker中默认集成了`vllm`推理加速框架和必需的依赖环境。因此在满足条件的设备上,您可以直接使用`vllm`加速VLM模型推理。 +> [!NOTE] +> 使用`vllm`加速VLM模型推理需要满足的条件是: +> +> - 设备包含Volta及以后架构的显卡,且可用显存大于等于8G。 +> - 物理机的显卡驱动应支持CUDA 12.8或更高版本,可通过`nvidia-smi`命令检查驱动版本。 +> - docker中能够访问物理机的显卡设备。 + + +## 启动 Docker 容器 + +```bash +docker run --gpus all \ + --shm-size 32g \ + -p 30000:30000 -p 7860:7860 -p 8000:8000 \ + --ipc=host \ + -it mineru:latest \ + /bin/bash +``` + +执行该命令后,您将进入到Docker容器的交互式终端,并映射了一些端口用于可能会使用的服务,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + +## 通过 Docker Compose 直接启动服务 + +我们提供了[compose.yml](https://github.com/opendatalab/MinerU/blob/master/docker/compose.yaml)文件,您可以通过它来快速启动MinerU服务。 + +```bash +# 下载 compose.yaml 文件 +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/compose.yaml +``` +>[!NOTE] +> +>- `compose.yaml`文件中包含了MinerU的多个服务配置,您可以根据需要选择启动特定的服务。 +>- 不同的服务可能会有额外的参数配置,您可以在`compose.yaml`文件中查看并编辑。 +>- 由于`vllm`推理加速框架预分配显存的特性,您可能无法在同一台机器上同时运行多个`vllm`服务,因此请确保在启动`vlm-openai-server`服务或使用`vlm-vllm-engine`后端时,其他可能使用显存的服务已停止。 + +--- + +### 启动 openai兼容接口 服务 +并通过`vlm-http-client`后端连接`openai-server` + ```bash + docker compose -f compose.yaml --profile openai-server up -d + ``` + >[!TIP] + >在另一个终端中通过http client连接openai server(只需cpu与网络,不需要vllm环境) + > ```bash + > mineru -p -o -b vlm-http-client -u http://:30000 + > ``` + +--- + +### 启动 Web API 服务 + ```bash + docker compose -f compose.yaml --profile api up -d + ``` + >[!TIP] + >在浏览器中访问 `http://:8000/docs` 查看API文档。 + +--- + +### 启动 Gradio WebUI 服务 + ```bash + docker compose -f compose.yaml --profile gradio up -d + ``` + >[!TIP] + > + >- 在浏览器中访问 `http://:7860` 使用 Gradio WebUI。 \ No newline at end of file diff --git a/docs/zh/quick_start/extension_modules.md b/docs/zh/quick_start/extension_modules.md new file mode 100644 index 0000000..72bad26 --- /dev/null +++ b/docs/zh/quick_start/extension_modules.md @@ -0,0 +1,54 @@ +# MinerU 扩展模块安装指南 +MinerU 支持根据不同需求,按需安装扩展模块,以增强功能或支持特定的模型后端。 + +## 常见场景 + +### 核心功能安装 +`core` 模块是 MinerU 的核心依赖,包含了除`vllm`/`lmdeploy`外的所有功能模块。安装此模块可以确保 MinerU 的基本功能正常运行。 +```bash +uv pip install "mineru[core]" +``` + +--- + +### 使用`vllm`加速 VLM 模型推理 +> [!NOTE] +> `vllm`和`lmdeploy`对vlm的推理加速效果和使用方式几乎相同,您可以根据实际情况选择其中之一进行安装和使用,但不建议同时安装这两个模块,以避免潜在的依赖冲突。 + +`vllm` 模块提供了对 VLM 模型推理的加速支持,适用于具有 Volta 及以后架构的显卡(8G 显存及以上)。安装此模块可以显著提升模型推理速度。 +```bash +uv pip install "mineru[core,vllm]" +``` +> [!TIP] +> 如在安装包含`vllm`的扩展包过程中发生异常,请参考 [vllm 官方文档](https://docs.vllm.ai/en/latest/getting_started/installation/index.html) 尝试解决,或直接使用 [Docker](./docker_deployment.md) 方式部署镜像。 + +--- + +### 使用`lmdeploy`加速 VLM 模型推理 +> [!NOTE] +> `vllm`和`lmdeploy`对vlm的推理加速效果和使用方式几乎相同,您可以根据实际情况选择其中之一进行安装和使用,但不建议同时安装这两个模块,以避免潜在的依赖冲突。 + +`lmdeploy` 模块提供了对 VLM 模型推理的加速支持,适用于具有 Volta 及以后架构的显卡(8G 显存及以上)。安装此模块可以显著提升模型推理速度。 +```bash +uv pip install "mineru[core,lmdeploy]" +``` +> [!TIP] +> 如在安装包含`lmdeploy`的扩展包过程中发生异常,请参考 [lmdeploy 官方文档](https://lmdeploy.readthedocs.io/en/latest/get_started/installation.html) 尝试解决。 + +--- + +### 安装轻量版client连接兼容openai服务器使用 (适用vlm-http-client模式) +如果您需要在边缘设备上安装轻量版的 client 端以连接兼容 openai 接口的服务端来使用vlm模式,可以安装mineru的基础包,非常轻量,适合在只有cpu和网络连接的设备上使用。 +```bash +uv pip install mineru +mineru -p -o -b vlm-http-client -u http://127.0.0.1:30000 +``` + +--- + +### 安装轻量版client连接兼容openai服务器使用 (适用hybrid-http-client模式) +如果您需要在边缘设备上安装轻量版的 client 端以连接兼容 openai 接口的服务端来使用hybrid模式,可以安装mineru的pipeline扩展包,相对较轻量,可以在只有cpu和网络连接的设备上使用,同时在支持gpu加速的设备上可以更快运行。 +```bash +uv pip install "mineru[pipeline]" +mineru -p -o -b hybrid-http-client -u http://127.0.0.1:30000 +``` diff --git a/docs/zh/quick_start/index.md b/docs/zh/quick_start/index.md new file mode 100644 index 0000000..e66d983 --- /dev/null +++ b/docs/zh/quick_start/index.md @@ -0,0 +1,152 @@ +# 快速入门 + +如果遇到任何安装问题,请先查询 [FAQ](../faq/index.md) + +## 在线体验 + +### 官网在线应用 +官网在线版功能与客户端一致,界面美观,功能丰富,需要登录使用 + +- [![OpenDataLab](https://img.shields.io/badge/webapp_on_mineru.net-blue?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTM0IiBoZWlnaHQ9IjEzNCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJtMTIyLDljMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0idXJsKCNhKSIvPjxwYXRoIGQ9Im0xMjIsOWMwLDUtNCw5LTksOXMtOS00LTktOSw0LTksOS05LDksNCw5LDl6IiBmaWxsPSIjMDEwMTAxIi8+PHBhdGggZD0ibTkxLDE4YzAsNS00LDktOSw5cy05LTQtOS05LDQtOSw5LTksOSw0LDksOXoiIGZpbGw9InVybCgjYikiLz48cGF0aCBkPSJtOTEsMThjMCw1LTQsOS05LDlzLTktNC05LTksNC05LDktOSw5LDQsOSw5eiIgZmlsbD0iIzAxMDEwMSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0idXJsKCNjKSIvPjxwYXRoIGZpbGwtcnVsZT0iZXZlbm9kZCIgY2xpcC1ydWxlPSJldmVub2RkIiBkPSJtMzksNjJjMCwxNiw4LDMwLDIwLDM4LDctNiwxMi0xNiwxMi0yNlY0OWMwLTQsMy03LDYtOGw0Ni0xMmM1LTEsMTEsMywxMSw4djMxYzAsMzctMzAsNjYtNjYsNjYtMzcsMC02Ni0zMC02Ni02NlY0NmMwLTQsMy03LDYtOGwyMC02YzUtMSwxMSwzLDExLDh2MjF6bS0yOSw2YzAsMTYsNiwzMCwxNyw0MCwzLDEsNSwxLDgsMSw1LDAsMTAtMSwxNS0zQzM3LDk1LDI5LDc5LDI5LDYyVjQybC0xOSw1djIweiIgZmlsbD0iIzAxMDEwMSIvPjxkZWZzPjxsaW5lYXJHcmFkaWVudCBpZD0iYSIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYiIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjxsaW5lYXJHcmFkaWVudCBpZD0iYyIgeDE9Ijg0IiB5MT0iNDEiIHgyPSI3NSIgeTI9IjEyMCIgZ3JhZGllbnRVbml0cz0idXNlclNwYWNlT25Vc2UiPjxzdG9wIHN0b3AtY29sb3I9IiNmZmYiLz48c3RvcCBvZmZzZXQ9IjEiIHN0b3AtY29sb3I9IiMyZTJlMmUiLz48L2xpbmVhckdyYWRpZW50PjwvZGVmcz48L3N2Zz4=&labelColor=white)](https://mineru.net/OpenSourceTools/Extractor?source=github) + +### 基于Gradio的在线demo +基于gradio开发的webui,界面简洁,仅包含核心解析功能,免登录 + +- [![ModelScope](https://img.shields.io/badge/Demo_on_ModelScope-purple?logo=data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjIzIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KCiA8Zz4KICA8dGl0bGU+TGF5ZXIgMTwvdGl0bGU+CiAgPHBhdGggaWQ9InN2Z18xNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTAsODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTUiIGZpbGw9IiM2MjRhZmYiIGQ9Im05OS4xNCwxMTUuNDlsMjUuNjUsMGwwLDI1LjY1bC0yNS42NSwwbDAsLTI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTYiIGZpbGw9IiM2MjRhZmYiIGQ9Im0xNzYuMDksMTQxLjE0bC0yNS42NDk5OSwwbDAsMjIuMTlsNDcuODQsMGwwLC00Ny44NGwtMjIuMTksMGwwLDI1LjY1eiIvPgogIDxwYXRoIGlkPSJzdmdfMTciIGZpbGw9IiMzNmNmZDEiIGQ9Im0xMjQuNzksODkuODRsMjUuNjUsMGwwLDI1LjY0OTk5bC0yNS42NSwwbDAsLTI1LjY0OTk5eiIvPgogIDxwYXRoIGlkPSJzdmdfMTgiIGZpbGw9IiMzNmNmZDEiIGQ9Im0wLDY0LjE5bDI1LjY1LDBsMCwyNS42NWwtMjUuNjUsMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzE5IiBmaWxsPSIjNjI0YWZmIiBkPSJtMTk4LjI4LDg5Ljg0bDI1LjY0OTk5LDBsMCwyNS42NDk5OWwtMjUuNjQ5OTksMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIwIiBmaWxsPSIjMzZjZmQxIiBkPSJtMTk4LjI4LDY0LjE5bDI1LjY0OTk5LDBsMCwyNS42NWwtMjUuNjQ5OTksMGwwLC0yNS42NXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIxIiBmaWxsPSIjNjI0YWZmIiBkPSJtMTUwLjQ0LDQybDAsMjIuMTlsMjUuNjQ5OTksMGwwLDI1LjY1bDIyLjE5LDBsMCwtNDcuODRsLTQ3Ljg0LDB6Ii8+CiAgPHBhdGggaWQ9InN2Z18yMiIgZmlsbD0iIzM2Y2ZkMSIgZD0ibTczLjQ5LDg5Ljg0bDI1LjY1LDBsMCwyNS42NDk5OWwtMjUuNjUsMGwwLC0yNS42NDk5OXoiLz4KICA8cGF0aCBpZD0ic3ZnXzIzIiBmaWxsPSIjNjI0YWZmIiBkPSJtNDcuODQsNjQuMTlsMjUuNjUsMGwwLC0yMi4xOWwtNDcuODQsMGwwLDQ3Ljg0bDIyLjE5LDBsMCwtMjUuNjV6Ii8+CiAgPHBhdGggaWQ9InN2Z18yNCIgZmlsbD0iIzYyNGFmZiIgZD0ibTQ3Ljg0LDExNS40OWwtMjIuMTksMGwwLDQ3Ljg0bDQ3Ljg0LDBsMCwtMjIuMTlsLTI1LjY1LDBsMCwtMjUuNjV6Ii8+CiA8L2c+Cjwvc3ZnPg==&labelColor=white)](https://www.modelscope.cn/studios/OpenDataLab/MinerU) +- [![HuggingFace](https://img.shields.io/badge/Demo_on_HuggingFace-yellow.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAF8AAABYCAMAAACkl9t/AAAAk1BMVEVHcEz/nQv/nQv/nQr/nQv/nQr/nQv/nQv/nQr/wRf/txT/pg7/yRr/rBD/zRz/ngv/oAz/zhz/nwv/txT/ngv/0B3+zBz/nQv/0h7/wxn/vRb/thXkuiT/rxH/pxD/ogzcqyf/nQvTlSz/czCxky7/SjifdjT/Mj3+Mj3wMj15aTnDNz+DSD9RTUBsP0FRO0Q6O0WyIxEIAAAAGHRSTlMADB8zSWF3krDDw8TJ1NbX5efv8ff9/fxKDJ9uAAAGKklEQVR42u2Z63qjOAyGC4RwCOfB2JAGqrSb2WnTw/1f3UaWcSGYNKTdf/P+mOkTrE+yJBulvfvLT2A5ruenaVHyIks33npl/6C4s/ZLAM45SOi/1FtZPyFur1OYofBX3w7d54Bxm+E8db+nDr12ttmESZ4zludJEG5S7TO72YPlKZFyE+YCYUJTBZsMiNS5Sd7NlDmKM2Eg2JQg8awbglfqgbhArjxkS7dgp2RH6hc9AMLdZYUtZN5DJr4molC8BfKrEkPKEnEVjLbgW1fLy77ZVOJagoIcLIl+IxaQZGjiX597HopF5CkaXVMDO9Pyix3AFV3kw4lQLCbHuMovz8FallbcQIJ5Ta0vks9RnolbCK84BtjKRS5uA43hYoZcOBGIG2Epbv6CvFVQ8m8loh66WNySsnN7htL58LNp+NXT8/PhXiBXPMjLSxtwp8W9f/1AngRierBkA+kk/IpUSOeKByzn8y3kAAAfh//0oXgV4roHm/kz4E2z//zRc3/lgwBzbM2mJxQEa5pqgX7d1L0htrhx7LKxOZlKbwcAWyEOWqYSI8YPtgDQVjpB5nvaHaSnBaQSD6hweDi8PosxD6/PT09YY3xQA7LTCTKfYX+QHpA0GCcqmEHvr/cyfKQTEuwgbs2kPxJEB0iNjfJcCTPyocx+A0griHSmADiC91oNGVwJ69RudYe65vJmoqfpul0lrqXadW0jFKH5BKwAeCq+Den7s+3zfRJzA61/Uj/9H/VzLKTx9jFPPdXeeP+L7WEvDLAKAIoF8bPTKT0+TM7W8ePj3Rz/Yn3kOAp2f1Kf0Weony7pn/cPydvhQYV+eFOfmOu7VB/ViPe34/EN3RFHY/yRuT8ddCtMPH/McBAT5s+vRde/gf2c/sPsjLK+m5IBQF5tO+h2tTlBGnP6693JdsvofjOPnnEHkh2TnV/X1fBl9S5zrwuwF8NFrAVJVwCAPTe8gaJlomqlp0pv4Pjn98tJ/t/fL++6unpR1YGC2n/KCoa0tTLoKiEeUPDl94nj+5/Tv3/eT5vBQ60X1S0oZr+IWRR8Ldhu7AlLjPISlJcO9vrFotky9SpzDequlwEir5beYAc0R7D9KS1DXva0jhYRDXoExPdc6yw5GShkZXe9QdO/uOvHofxjrV/TNS6iMJS+4TcSTgk9n5agJdBQbB//IfF/HpvPt3Tbi7b6I6K0R72p6ajryEJrENW2bbeVUGjfgoals4L443c7BEE4mJO2SpbRngxQrAKRudRzGQ8jVOL2qDVjjI8K1gc3TIJ5KiFZ1q+gdsARPB4NQS4AjwVSt72DSoXNyOWUrU5mQ9nRYyjp89Xo7oRI6Bga9QNT1mQ/ptaJq5T/7WcgAZywR/XlPGAUDdet3LE+qS0TI+g+aJU8MIqjo0Kx8Ly+maxLjJmjQ18rA0YCkxLQbUZP1WqdmyQGJLUm7VnQFqodmXSqmRrdVpqdzk5LvmvgtEcW8PMGdaS23EOWyDVbACZzUJPaqMbjDxpA3Qrgl0AikimGDbqmyT8P8NOYiqrldF8rX+YN7TopX4UoHuSCYY7cgX4gHwclQKl1zhx0THf+tCAUValzjI7Wg9EhptrkIcfIJjA94evOn8B2eHaVzvBrnl2ig0So6hvPaz0IGcOvTHvUIlE2+prqAxLSQxZlU2stql1NqCCLdIiIN/i1DBEHUoElM9dBravbiAnKqgpi4IBkw+utSPIoBijDXJipSVV7MpOEJUAc5Qmm3BnUN+w3hteEieYKfRZSIUcXKMVf0u5wD4EwsUNVvZOtUT7A2GkffHjByWpHqvRBYrTV72a6j8zZ6W0DTE86Hn04bmyWX3Ri9WH7ZU6Q7h+ZHo0nHUAcsQvVhXRDZHChwiyi/hnPuOsSEF6Exk3o6Y9DT1eZ+6cASXk2Y9k+6EOQMDGm6WBK10wOQJCBwren86cPPWUcRAnTVjGcU1LBgs9FURiX/e6479yZcLwCBmTxiawEwrOcleuu12t3tbLv/N4RLYIBhYexm7Fcn4OJcn0+zc+s8/VfPeddZHAGN6TT8eGczHdR/Gts1/MzDkThr23zqrVfAMFT33Nx1RJsx1k5zuWILLnG/vsH+Fv5D4NTVcp1Gzo8AAAAAElFTkSuQmCC&labelColor=white)](https://huggingface.co/spaces/opendatalab/MinerU) + +## 本地部署 + +> [!WARNING] +> **安装前必看——软硬件环境支持说明** +> +> 为了确保项目的稳定性和可靠性,我们在开发过程中仅对特定的软硬件环境进行优化和测试。这样当用户在推荐的系统配置上部署和运行项目时,能够获得最佳的性能表现和最少的兼容性问题。 +> +> 通过集中资源和精力于主线环境,我们团队能够更高效地解决潜在的BUG,及时开发新功能。 +> +> 在非主线环境中,由于硬件、软件配置的多样性,以及第三方依赖项的兼容性问题,我们无法100%保证项目的完全可用性。因此,对于希望在非推荐环境中使用本项目的用户,我们建议先仔细阅读文档以及FAQ,大多数问题已经在FAQ中有对应的解决方案,除此之外我们鼓励社区反馈问题,以便我们能够逐步扩大支持范围。 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
解析后端pipeline*-auto-engine*-http-client
hybridvlmhybridvlm
后端特性兼容性好硬件配置要求较高适用于OpenAI兼容服务器2
精度指标182+90+
操作系统Linux3 / Windows4 / macOS5
纯CPU平台支持
GPU加速支持Volta及以后架构GPU或Apple Silicon不需要
显存最低要求6GB10GB8GB3GB
内存要求最低16GB以上,推荐32GB以上8GB
磁盘空间要求20GB以上,推荐使用SSD2GB
python版本3.10-3.13
+ +1 精度指标为OmniDocBench (v1.5)的End-to-End Evaluation Overall分数,基于`MinerU`最新版本测试 +2 兼容OpenAI API的服务器,如通过`vLLM`/`SGLang`/`LMDeploy`等推理框架部署的本地模型服务器或远程模型服务 +3 Linux仅支持2019年及以后发行版 +4 由于关键依赖`ray`未能在windows平台支持Python 3.13,故仅支持至3.10~3.12版本 +5 macOS 需使用14.0以上版本 + + +> [!TIP] +> 除以上主流环境与平台外,我们也收录了一些社区用户反馈的其他平台支持情况,详情请参考[其他加速卡适配](https://opendatalab.github.io/MinerU/zh/usage/)。 +> 如果您有意将自己的环境适配经验分享给社区,欢迎通过[show-and-tell](https://github.com/opendatalab/MinerU/discussions/categories/show-and-tell)提交或提交PR至[其他加速卡适配](https://github.com/opendatalab/MinerU/tree/master/docs/zh/usage/acceleration_cards)文档。 + + +### 安装 MinerU + +#### 使用pip或uv安装MinerU +```bash +pip install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple +pip install uv -i https://mirrors.aliyun.com/pypi/simple +uv pip install -U "mineru[all]" -i https://mirrors.aliyun.com/pypi/simple +``` + +#### 通过源码安装MinerU +```bash +git clone https://github.com/opendatalab/MinerU.git +cd MinerU +uv pip install -e .[all] -i https://mirrors.aliyun.com/pypi/simple +``` + +> [!TIP] +> `mineru[all]`包含所有核心功能,兼容Windows / Linux / macOS系统,适合绝大多数用户。 +> 如果您需要指定vlm模型的推理框架,或是仅准备在边缘设备安装轻量版client端,可以参考文档[扩展模块安装指南](https://opendatalab.github.io/MinerU/zh/quick_start/extension_modules/)。 + +--- + +#### 使用docker部署Mineru +MinerU提供了便捷的docker部署方式,这有助于快速搭建环境并解决一些棘手的环境兼容问题。 +您可以在文档中获取[Docker部署说明](./docker_deployment.md)。 + +--- + +### 使用 MinerU + +>[!TIP] +>默认使用托管在`huggingface`的模型进行解析,首次使用时会自动下载所需模型文件,后续使用将直接加载本地缓存的模型。如果您无法访问`huggingface`,可以通过以下命令切换至国内镜像源: +>```bash +>export MINERU_MODEL_SOURCE=modelscope +>``` + +如果您的设备满足上表中GPU加速的条件,可以使用简单的命令行进行文档解析: +```bash +mineru -p -o +``` +如果您的设备不满足GPU加速条件,可以指定后端为`pipeline`,以在纯CPU环境下运行: +```bash +mineru -p -o -b pipeline +``` + +您可以通过命令行、API、WebUI等多种方式使用MinerU进行PDF解析,具体使用方法请参考[使用指南](../usage/index.md)。 \ No newline at end of file diff --git a/docs/zh/reference/changelog.md b/docs/zh/reference/changelog.md new file mode 100644 index 0000000..a347932 --- /dev/null +++ b/docs/zh/reference/changelog.md @@ -0,0 +1,466 @@ +# 更新日志 + +本文档记录了MinerU项目2.6.7及更早版本的更新历史。最新版本的更新请查看项目[README](https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md)。 + +--- + +## 2.6 系列版本 + +### 2.6.7 (2025/12/12) + +- bug修复: #4168 + +### 2.6.6 (2025/12/02) + +**`Ascend`适配优化** + +- 优化命令行工具初始化流程,使Ascend适配方案中`vlm-vllm-engine`后端在命令行工具中可用。 +- 为Atlas 300I Duo(310p)设备更新适配文档。 + +**`mineru-api`工具优化** + +- 为`mineru-api`接口参数增加描述性文本,优化接口文档可读性。 +- 可通过环境变量`MINERU_API_ENABLE_FASTAPI_DOCS`控制是否启用自动生成的接口文档页面,默认为启用。 +- 为`vlm-vllm-async-engine`、`vlm-lmdeploy-engine`、`vlm-http-client`后端增加并发数配置选项,用户可通过环境变量`MINERU_API_MAX_CONCURRENT_REQUESTS`控制api接口的最大并发请求数,默认为不限制数量。 + +### 2.6.5 (2025/11/26) + +- 增加新后端`vlm-lmdeploy-engine`支持,使用方式与`vlm-vllm-(async)engine`类似,但使用`lmdeploy`作为推理引擎,与`vllm`相比额外支持Windows平台原生推理加速。 +- 新增国产算力平台`昇腾/npu`、`平头哥/ppu`、`沐曦/maca`的适配支持,用户可在对应平台上使用`pipeline`与`vlm`模型,并使用`vllm`/`lmdeploy`引擎加速vlm模型推理,具体使用方式请参考[其他加速卡适配](https://opendatalab.github.io/MinerU/zh/usage/)。 + - 国产平台适配不易,我们已尽量确保适配的完整性和稳定性,但仍可能存在一些稳定性/兼容问题与精度对齐问题,请大家根据适配文档页面内红绿灯情况自行选择合适的环境与场景进行使用。 + - 如在使用国产化平台适配方案的过程中遇到任何文档未提及的问题,为便于其他用户查找解决方案,请在discussions的[指定帖子](https://github.com/opendatalab/MinerU/discussions/4064)中进行反馈。 + +### 2.6.4 (2025/11/04) + +- 为pdf渲染图片增加超时配置,默认为300秒,可通过环境变量`MINERU_PDF_RENDER_TIMEOUT`进行配置,防止部分异常pdf文件导致渲染过程长时间阻塞。 +- 为onnx模型增加cpu线程数配置选项,默认为系统cpu核心数,可通过环境变量`MINERU_INTRA_OP_NUM_THREADS`和`MINERU_INTER_OP_NUM_THREADS`进行配置,以减少高并发场景下的对cpu资源的抢占冲突。 + +### 2.6.3 (2025/10/31) + +- 增加新后端`vlm-mlx-engine`支持,在Apple Silicon设备上支持使用`MLX`加速`MinerU2.5`模型推理,相比`vlm-transformers`后端,`vlm-mlx-engine`后端速度提升100%~200%。 +- bug修复: #3849 #3859 + +### 2.6.2 (2025/10/24) + +**`pipline`后端优化** + +- 增加对中文公式的实验性支持,可通过配置环境变量`export MINERU_FORMULA_CH_SUPPORT=1`开启。该功能可能会导致MFR速率略微下降、部分长公式识别失败等问题,建议仅在需要解析中文公式的场景下开启。如需关闭该功能,可将环境变量设置为`0`。 +- `OCR`速度大幅提升200%~300%,感谢 [@cjsdurj](https://github.com/cjsdurj) 提供的优化方案 +- `OCR`模型优化拉丁文识别的准度和广度,并更新西里尔文(cyrillic)、阿拉伯文(arabic)、天城文(devanagari)、泰卢固语(te)、泰米尔语(ta)语系至`ppocr-v5`版本,精度相比上代模型提升40%以上 + +**`vlm`后端优化** + +- `table_caption`、`table_footnote`匹配逻辑优化,提升页内多张连续表场景下的表格标题和脚注的匹配准确率和阅读顺序合理性 +- 优化使用`vllm`后端时高并发时的cpu资源占用,降低服务端压力 +- 适配`vllm`0.11.0版本 + +**通用优化** + +- 跨页表格合并效果优化,新增跨页续表合并支持,提升在多列合并场景下的表格合并效果 +- 为表格合并功能增加环境变量配置选项`MINERU_TABLE_MERGE_ENABLE`,表格合并功能默认开启,可通过设置该变量为`0`来关闭表格合并功能 + +--- + +## 2.5 系列版本 + +### 2.5.4 (2025/09/26) + +- 🎉🎉 MinerU2.5[技术报告](https://arxiv.org/abs/2509.22186)现已发布,欢迎阅读全面了解其模型架构、训练策略、数据工程和评测结果。 +- 修复部分`pdf`文件被识别成`ai`文件导致无法解析的问题 + +### 2.5.3 (2025/09/20) + +- 依赖版本范围调整,使得Turing及更早架构显卡可以使用vLLM加速推理MinerU2.5模型。 +- `pipeline`后端对torch 2.8.0的一些兼容性修复。 +- 降低vLLM异步后端默认的并发数,降低服务端压力以避免高压导致的链接关闭问题。 +- 更多兼容性相关内容详见[公告](https://github.com/opendatalab/MinerU/discussions/3547) + +### 2.5.2 (2025/09/19) + +我们正式发布 MinerU2.5,当前最强文档解析多模态大模型。仅凭 1.2B 参数,MinerU2.5 在 OmniDocBench 文档解析评测中,精度已全面超越 Gemini2.5-Pro、GPT-4o、Qwen2.5-VL-72B等顶级多模态大模型,并显著领先于主流文档解析专用模型(如 dots.ocr, MonkeyOCR, PP-StructureV3 等)。 + +模型已发布至[HuggingFace](https://huggingface.co/opendatalab/MinerU2.5-2509-1.2B)和[ModelScope](https://modelscope.cn/models/opendatalab/MinerU2.5-2509-1.2B)平台,欢迎大家下载使用! + +**核心亮点** + +- 极致能效,性能SOTA: 以 1.2B 的轻量化规模,实现了超越百亿乃至千亿级模型的SOTA性能,重新定义了文档解析的能效比。 +- 先进架构,全面领先: 通过 "两阶段推理" (解耦布局分析与内容识别) 与 原生高分辨率架构 的结合,在布局分析、文本识别、公式识别、表格识别及阅读顺序五大方面均达到 SOTA 水平。 + +**关键能力提升** + +- 布局检测: 结果更完整,精准覆盖页眉、页脚、页码等非正文内容;同时提供更精准的元素定位与更自然的格式还原(如列表、参考文献)。 +- 表格解析: 大幅优化了对旋转表格、无线/少线表、以及长难表格的解析能力。 +- 公式识别: 显著提升中英混合公式及复杂长公式的识别准确率,大幅改善数学类文档解析能力。 + +**仓库调整** + +此外,伴随vlm 2.5的发布,我们对仓库做出一些调整: + +- vlm后端升级至2.5版本,支持MinerU2.5模型,不再兼容MinerU2.0-2505-0.9B模型,最后一个支持2.0模型的版本为mineru-2.2.2。 +- vlm推理相关代码已移至[mineru_vl_utils](https://github.com/opendatalab/mineru-vl-utils),降低与mineru主仓库的耦合度,便于后续独立迭代。 +- vlm加速推理框架从`sglang`切换至`vllm`,并实现对vllm生态的完全兼容,使得用户可以在任何支持vllm框架的平台上使用MinerU2.5模型并加速推理。 +- 由于vlm模型的重大升级,支持更多layout type,因此我们对解析的中间文件`middle.json`和结果文件`content_list.json`的结构做出一些调整,请参考[文档](https://opendatalab.github.io/MinerU/zh/reference/output_files/)了解详情。 + +**其他仓库优化** + +- 移除对输入文件的后缀名白名单校验,当输入文件为PDF文档或图片时,对文件的后缀名不再有要求,提升易用性。 + +--- + +## 2.2 - 2.4 系列版本 + +### 2.2.2 (2025/09/10) + +- 修复新的表格识别模型在部分表格解析失败时影响整体解析任务的问题 + +### 2.2.1 (2025/09/08) + +- 修复使用模型下载命令时,部分新增模型未下载的问题 + +### 2.2.0 (2025/09/05) + +**主要更新** + +- 在这个版本我们重点提升了表格的解析精度,通过引入新的[有线表识别模型](https://github.com/RapidAI/TableStructureRec)和全新的混合表格结构解析算法,显著提升了`pipeline`后端的表格识别能力。 +- 另外我们增加了对跨页表格合并的支持,这一功能同时支持`pipeline`和`vlm`后端,进一步提升了表格解析的完整性和准确性。 + +**其他更新** + +- `pipeline`后端增加270度旋转的表格解析能力,现已支持0/90/270度三个方向的表格解析 +- `pipeline`增加对泰文、希腊文的ocr能力支持,并更新了英文ocr模型至最新,英文识别精度提升11%,泰文识别模型精度 82.68%,希腊文识别模型精度 89.28%(by PPOCRv5) +- 在输出的`content_list.json`中增加了`bbox`字段(映射至0-1000范围内),方便用户直接获取每个内容块的位置信息 +- 移除`pipeline_old_linux`安装可选项,不再支持老版本的Linux系统如`Centos 7`等,以便对`uv`的`sync`/`run`等命令进行更好的支持 + +--- + +## 2.1 系列版本 + +### 2.1.10 (2025/08/01) + +- 修复`pipeline`后端因block覆盖导致的解析结果与预期不符 #3232 + +### 2.1.9 (2025/07/30) + +- `transformers` 4.54.1 版本适配 + +### 2.1.8 (2025/07/28) + +- `sglang` 0.4.9.post5 版本适配 + +### 2.1.7 (2025/07/27) + +- `transformers` 4.54.0 版本适配 + +### 2.1.6 (2025/07/26) + +- 修复`vlm`后端解析部分手写文档时的表格异常问题 +- 修复文档旋转时可视化框位置漂移问题 #3175 + +### 2.1.5 (2025/07/24) + +- `sglang` 0.4.9 版本适配,同步升级dockerfile基础镜像为sglang 0.4.9.post3 + +### 2.1.4 (2025/07/23) + +**bug修复** + +- 修复`pipeline`后端中`MFR`步骤在某些情况下显存消耗过大的问题 #2771 +- 修复某些情况下`image`/`table`与`caption`/`footnote`匹配不准确的问题 #3129 + +### 2.1.1 (2025/07/16) + +**bug修复** + +- 修复`pipeline`在某些情况可能发生的文本块内容丢失问题 #3005 +- 修复`sglang-client`需要安装`torch`等不必要的包的问题 #2968 +- 更新`dockerfile`以修复linux字体缺失导致的解析文本内容不完整问题 #2915 + +**易用性更新** + +- 更新`compose.yaml`,便于用户直接启动`sglang-server`、`mineru-api`、`mineru-gradio`服务 +- 启用全新的[在线文档站点](https://opendatalab.github.io/MinerU/zh/),简化readme,提供更好的文档体验 + +### 2.1.0 (2025/07/05) + +这是 MinerU 2 的第一个大版本更新,包含了大量新功能和改进,包含众多性能优化、体验优化和bug修复,具体更新内容如下: + +**性能优化** + +- 大幅提升某些特定分辨率(长边2000像素左右)文档的预处理速度 +- 大幅提升`pipeline`后端批量处理大量页数较少(<10)文档时的后处理速度 +- `pipeline`后端的layout分析速度提升约20% + +**体验优化** + +- 内置开箱即用的`fastapi服务`和`gradio webui`,详细使用方法请参考[文档](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuisglang-clientserver) +- `sglang`适配`0.4.8`版本,大幅降低`vlm-sglang`后端的显存要求,最低可在`8G显存`(Turing及以后架构)的显卡上运行 +- 对所有命令增加`sglang`的参数透传,使得`sglang-engine`后端可以与`sglang-server`一致,接收`sglang`的所有参数 +- 支持基于配置文件的功能扩展,包含`自定义公式标识符`、`开启标题分级功能`、`自定义本地模型目录`,详细使用方法请参考[文档](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#mineru_1) + +**新特性** + +- `pipeline`后端更新 PP-OCRv5 多语种文本识别模型,支持法语、西班牙语、葡萄牙语、俄语、韩语等 37 种语言的文字识别,平均精度涨幅超30%。[详情](https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/algorithm/PP-OCRv5/PP-OCRv5_multi_languages.html) +- `pipeline`后端增加对竖排文本的有限支持 + +--- + +## 2.0 系列版本 + +### 2.0.6 (2025/06/20) + +- 修复`vlm`模式下,某些偶发的无效块内容导致解析中断问题 +- 修复`vlm`模式下,某些不完整的表结构导致的解析中断问题 + +### 2.0.5 (2025/06/17) + +- 修复了`sglang-client`模式下依然需要下载模型的问题 +- 修复了`sglang-client`模式需要依赖`torch`等实际运行不需要的包的问题 +- 修复了同一进程内尝试通过多个url启动多个`sglang-client`实例时,只有第一个生效的问题 + +### 2.0.3 (2025/06/15) + +- 修复了当下载模型类型设置为`all`时,配置文件出现键值更新错误的问题 +- 修复了命令行模式下公式和表格功能开关不生效导致功能无法关闭的问题 +- 修复了`sglang-engine`模式下,0.4.7版本sglang的兼容性问题 +- 更新了sglang环境下部署完整版MinerU的Dockerfile和相关安装文档 + +### 2.0.0 (2025/06/13) + +**全新架构** + +MinerU 2.0 在代码结构和交互方式上进行了深度重构,显著提升了系统的易用性、可维护性与扩展能力。 + +- **去除第三方依赖限制**:彻底移除对 `pymupdf` 的依赖,推动项目向更开放、合规的开源方向迈进。 +- **开箱即用,配置便捷**:无需手动编辑 JSON 配置文件,绝大多数参数已支持命令行或 API 直接设置。 +- **模型自动管理**:新增模型自动下载与更新机制,用户无需手动干预即可完成模型部署。 +- **离线部署友好**:提供内置模型下载命令,支持完全断网环境下的部署需求。 +- **代码结构精简**:移除数千行冗余代码,简化类继承逻辑,显著提升代码可读性与开发效率。 +- **统一中间格式输出**:采用标准化的 `middle_json` 格式,兼容多数基于该格式的二次开发场景,确保生态业务无缝迁移。 + +**全新模型** + +MinerU 2.0 集成了我们最新研发的小参数量、高性能多模态文档解析模型,实现端到端的高速、高精度文档理解。 + +- **小模型,大能力**:模型参数不足 1B,却在解析精度上超越传统 72B 级别的视觉语言模型(VLM)。 +- **多功能合一**:单模型覆盖多语言识别、手写识别、版面分析、表格解析、公式识别、阅读顺序排序等核心任务。 +- **极致推理速度**:在单卡 NVIDIA 4090 上通过 `sglang` 加速,达到峰值吞吐量超过 10,000 token/s,轻松应对大规模文档处理需求。 +- **在线体验**:您可以在[MinerU.net](https://mineru.net/OpenSourceTools/Extractor)、[Hugging Face](https://huggingface.co/spaces/opendatalab/MinerU), 以及[ModelScope](https://www.modelscope.cn/studios/OpenDataLab/MinerU)体验我们的全新VLM模型 + +**不兼容变更说明** + +为提升整体架构合理性与长期可维护性,本版本包含部分不兼容的变更: + +- Python 包名从 `magic-pdf` 更改为 `mineru`,命令行工具也由 `magic-pdf` 改为 `mineru`,请同步更新脚本与调用命令。 +- 出于对系统模块化设计与生态一致性的考虑,MinerU 2.0 已不再内置 LibreOffice 文档转换模块。如需处理 Office 文档,建议通过独立部署的 LibreOffice 服务先行转换为 PDF 格式,再进行后续解析操作。 + +--- + +## 1.x 系列历史版本 + +### 1.3.12 (2025/05/24) + +增加ppocrv5模型的支持,将`ch_server`模型更新为`PP-OCRv5_rec_server`,`ch_lite`模型更新为`PP-OCRv5_rec_mobile`(需更新模型) + +- 在测试中,发现ppocrv5(server)对手写文档效果有一定提升,但在其余类别文档的精度略差于v4_server_doc,因此默认的ch模型保持不变,仍为`PP-OCRv4_server_rec_doc`。 +- 由于ppocrv5强化了手写场景和特殊字符的识别能力,因此您可以在日繁混合场景以及手写文档场景下手动选择使用ppocrv5模型 +- 您可通过lang参数`lang='ch_server'`(python api)或`--lang ch_server`(命令行)自行选择相应的模型: + - `ch` :`PP-OCRv4_rec_server_doc`(默认)(中英日繁混合/1.5w字典) + - `ch_server` :`PP-OCRv5_rec_server`(中英日繁混合+手写场景/1.8w字典) + - `ch_lite` :`PP-OCRv5_rec_mobile`(中英日繁混合+手写场景/1.8w字典) + - `ch_server_v4` :`PP-OCRv4_rec_server`(中英混合/6k字典) + - `ch_lite_v4` :`PP-OCRv4_rec_mobile`(中英混合/6k字典) + +增加手写文档的支持,通过优化layout对手写文本区域的识别,现已支持手写文档的解析 + +- 默认支持此功能,无需额外配置 +- 可以参考上述说明,手动选择ppocrv5模型以获得更好的手写文档解析效果 + +`huggingface`和`modelscope`的demo已更新为支持手写识别和ppocrv5模型的版本,可自行在线体验 + +### 1.3.10 (2025/04/29) + +- 支持使用自定义公式标识符,可通过修改用户目录下的`magic-pdf.json`文件中的`latex-delimiter-config`项实现。 + +### 1.3.9 (2025/04/27) + +- 优化公式解析功能,提升公式渲染的成功率 + +### 1.3.8 (2025/04/23) + +`ocr`默认模型(`ch`)更新为`PP-OCRv4_server_rec_doc`(需更新模型) + +- `PP-OCRv4_server_rec_doc`是在`PP-OCRv4_server_rec`的基础上,在更多中文文档数据和PP-OCR训练数据的混合数据训练而成,增加了部分繁体字、日文、特殊字符的识别能力,可支持识别的字符为1.5万+,除文档相关的文字识别能力提升外,也同时提升了通用文字的识别能力。 +- [PP-OCRv4_server_rec_doc/PP-OCRv4_server_rec/PP-OCRv4_mobile_rec 性能对比](https://paddlepaddle.github.io/PaddleX/latest/module_usage/tutorials/ocr_modules/text_recognition.html#_3) +- 经验证,`PP-OCRv4_server_rec_doc`模型在`中英日繁`单种语言或多种语言混合场景均有明显精度提升,且速度与`PP-OCRv4_server_rec`相当,适合绝大部分场景使用。 +- `PP-OCRv4_server_rec_doc`在小部分纯英文场景可能会发生单词粘连问题,`PP-OCRv4_server_rec`则在此场景下表现更好,因此我们保留了`PP-OCRv4_server_rec`模型,用户可通过增加参数`lang='ch_server'`(python api)或`--lang ch_server`(命令行)调用。 + +### 1.3.7 (2025/04/22) + +- 修复表格解析模型初始化时lang参数失效的问题 +- 修复在`cpu`模式下ocr和表格解析速度大幅下降的问题 + +### 1.3.4 (2025/04/16) + +- 通过移除一些无用的块,小幅提升了ocr-det的速度 +- 修复部分情况下由footnote导致的页面内排序错误 + +### 1.3.2 (2025/04/12) + +- 修复了windows系统下,在python3.13环境安装时一些依赖包版本不兼容的问题 +- 优化批量推理时的内存占用 +- 优化旋转90度表格的解析效果 +- 优化财报样本中超大表格的解析效果 +- 修复了在未指定OCR语言时,英文文本区域偶尔出现的单词黏连问题(需要更新模型) + +### 1.3.1 (2025/04/08) + +修复了一些兼容问题 + +- 支持python 3.13 +- 为部分过时的linux系统(如centos7)做出最后适配,并不再保证后续版本的继续支持,[安装说明](https://github.com/opendatalab/MinerU/issues/1004) + +### 1.3.0 (2025/04/03) + +**安装与兼容性优化** + +- 通过移除layout中`layoutlmv3`的使用,解决了由`detectron2`导致的兼容问题 +- torch版本兼容扩展到2.2~2.6(2.5除外) +- cuda兼容支持11.8/12.4/12.6/12.8(cuda版本由torch决定),解决部分用户50系显卡与H系显卡的兼容问题 +- python兼容版本扩展到3.10~3.12,解决了在非3.10环境下安装时自动降级到0.6.1的问题 +- 优化离线部署流程,部署成功后不需要联网下载任何模型文件 + +**性能优化** + +- 通过支持多个pdf文件的batch处理([脚本样例](demo/batch_demo.py)),提升了批量小文件的解析速度 (与1.0.1版本相比,公式解析速度最高提升超过1400%,整体解析速度最高提升超过500%) +- 通过优化mfr模型的加载和使用,降低了显存占用并提升了解析速度(需重新执行[模型下载流程](docs/how_to_download_models_zh_cn.md)以获得模型文件的增量更新) +- 优化显存占用,最低仅需6GB即可运行本项目 +- 优化了在mps设备上的运行速度 + +**解析效果优化** + +- mfr模型更新到`unimernet(2503)`,解决多行公式中换行丢失的问题 + +**易用性优化** + +- 通过使用`paddleocr2torch`,完全替代`paddle`框架以及`paddleocr`在项目中的使用,解决了`paddle`和`torch`的冲突问题,和由于`paddle`框架导致的线程不安全问题 +- 解析过程增加实时进度条显示,精准把握解析进度,让等待不再痛苦 + +### 1.2.1 (2025/03/03) + +修复了一些问题 + +- 修复在字母与数字的全角转半角操作时对标点符号的影响 +- 修复在某些情况下caption的匹配不准确问题 +- 修复在某些情况下的公式span丢失问题 + +### 1.2.0 (2025/02/24) + +这个版本我们修复了一些问题,提升了解析的效率与精度: + +**性能优化** + +- auto模式下pdf文档的分类速度提升 + +**解析优化** + +- 优化对包含水印文档的解析逻辑,显著提升包含水印文档的解析效果 +- 改进了单页内多个图像/表格与caption的匹配逻辑,提升了复杂布局下图文匹配的准确性 + +**问题修复** + +- 修复在某些情况下图片/表格span被填充进textblock导致的异常 +- 修复在某些情况下标题block为空的问题 + +### 1.1.0 (2025/01/22) + +在这个版本我们重点提升了解析的精度与效率: + +**模型能力升级**(需重新执行 [模型下载流程](https://github.com/opendatalab/MinerU/docs/how_to_download_models_zh_cn.md) 以获得模型文件的增量更新) + +- 布局识别模型升级到最新的 `doclayout_yolo(2501)` 模型,提升了layout识别精度 +- 公式解析模型升级到最新的 `unimernet(2501)` 模型,提升了公式识别精度 + +**性能优化** + +- 在配置满足一定条件(显存16GB+)的设备上,通过优化资源占用和重构处理流水线,整体解析速度提升50%以上 + +**解析效果优化** + +- 在线demo([mineru.net](https://mineru.net/OpenSourceTools/Extractor) / [huggingface](https://huggingface.co/spaces/opendatalab/MinerU) / [modelscope](https://www.modelscope.cn/studios/OpenDataLab/MinerU))上新增标题分级功能(测试版本,默认开启),支持对标题进行分级,提升文档结构化程度 + +### 1.0.1 (2025/01/10) + +这是我们的第一个正式版本,在这个版本中,我们通过大量重构带来了全新的API接口和更广泛的兼容性,以及全新的自动语言识别功能: + +**全新API接口** + +- 对于数据侧API,我们引入了Dataset类,旨在提供一个强大而灵活的数据处理框架。该框架当前支持包括图像(.jpg及.png)、PDF、Word(.doc及.docx)、以及PowerPoint(.ppt及.pptx)在内的多种文档格式,确保了从简单到复杂的数据处理任务都能得到有效的支持。 +- 针对用户侧API,我们将MinerU的处理流程精心设计为一系列可组合的Stage阶段。每个Stage代表了一个特定的处理步骤,用户可以根据自身需求自由地定义新的Stage,并通过创造性地组合这些阶段来定制专属的数据处理流程。 + +**更广泛的兼容性适配** + +- 通过优化依赖环境和配置项,确保在ARM架构的Linux系统上能够稳定高效运行。 +- 深度适配华为昇腾NPU加速,积极响应信创要求,提供自主可控的高性能计算能力,助力人工智能应用平台的国产化应用与发展。 [NPU加速教程](https://github.com/opendatalab/MinerU/docs/README_Ascend_NPU_Acceleration_zh_CN.md) + +**自动语言识别** + +- 通过引入全新的语言识别模型, 在文档解析中将 `lang` 配置为 `auto`,即可自动选择合适的OCR语言模型,提升扫描类文档解析的准确性。 + +--- + +## 0.x 系列历史版本 + +### 0.10.0 (2024/11/22) + +通过引入混合OCR文本提取能力: + +- 在公式密集、span区域不规范、部分文本使用图像表现等复杂文本分布场景下获得解析效果的显著提升 +- 同时具备文本模式内容提取准确、速度更快与OCR模式span/line区域识别更准的双重优势 + +### 0.9.3 (2024/11/15) + +为表格识别功能接入了[RapidTable](https://github.com/RapidAI/RapidTable),单表解析速度提升10倍以上,准确率更高,显存占用更低 + +### 0.9.2 (2024/11/06) + +为表格识别功能接入了[StructTable-InternVL2-1B](https://huggingface.co/U4R/StructTable-InternVL2-1B)模型 + +### 0.9.0 (2024/10/31) + +这是我们进行了大量代码重构的全新版本,解决了众多问题,提升了性能,降低了硬件需求,并提供了更丰富的易用性: + +- 重构排序模块代码,使用 [layoutreader](https://github.com/ppaanngggg/layoutreader) 进行阅读顺序排序,确保在各种排版下都能实现极高准确率 +- 重构段落拼接模块,在跨栏、跨页、跨图、跨表情况下均能实现良好的段落拼接效果 +- 重构列表和目录识别功能,极大提升列表块和目录块识别的准确率及对应文本段落的解析效果 +- 重构图、表与描述性文本的匹配逻辑,大幅提升 caption 和 footnote 与图表的匹配准确率,并将描述性文本的丢失率降至接近0 +- 增加 OCR 的多语言支持,支持 84 种语言的检测与识别,语言支持列表详见 [OCR 语言支持列表](https://paddlepaddle.github.io/PaddleOCR/latest/ppocr/blog/multi_languages.html#5) +- 增加显存回收逻辑及其他显存优化措施,大幅降低显存使用需求。开启除表格加速外的全部加速功能(layout/公式/OCR)的显存需求从16GB降至8GB,开启全部加速功能的显存需求从24GB降至10GB +- 优化配置文件的功能开关,增加独立的公式检测开关,无需公式检测时可大幅提升速度和解析效果 +- 集成 [PDF-Extract-Kit 1.0](https://github.com/opendatalab/PDF-Extract-Kit) + - 加入自研的 `doclayout_yolo` 模型,在相近解析效果情况下比原方案提速10倍以上,可通过配置文件与 `layoutlmv3` 自由切换 + - 公式解析升级至 `unimernet 0.2.1`,在提升公式解析准确率的同时,大幅降低显存需求 + - 因 `PDF-Extract-Kit 1.0` 更换仓库,需要重新下载模型,步骤详见 [如何下载模型](https://github.com/opendatalab/MinerU/docs/how_to_download_models_zh_cn.md) + +### 0.8.1 (2024/09/27) + +修复了一些bug,同时提供了[在线demo](https://opendatalab.com/OpenSourceTools/Extractor/PDF/)的[本地化部署版本](https://github.com/opendatalab/MinerU/projects/web_demo/README_zh-CN.md)和[前端界面](https://github.com/opendatalab/MinerU/projects/web/README_zh-CN.md) + +### 0.8.0 (2024/09/09) + +支持Dockerfile快速部署,同时上线了huggingface、modelscope demo + +### 0.7.1 (2024/08/30) + +集成了paddle tablemaster表格识别功能 + +### 0.7.0b1 (2024/08/09) + +简化安装步骤提升易用性,加入表格识别功能 + +### 0.6.2b1 (2024/08/01) + +优化了依赖冲突问题和安装文档 + +### 首次开源 (2024/07/05) + +MinerU项目首次开源发布 diff --git a/docs/zh/reference/index.md b/docs/zh/reference/index.md new file mode 100644 index 0000000..a1b61fb --- /dev/null +++ b/docs/zh/reference/index.md @@ -0,0 +1,27 @@ +# 参考文档 + +本章节提供了 MinerU 项目的详细参考资料。在这里您可以找到技术规范、API 文档、输出文件格式说明以及版本历史记录。 + +## 目录 + +- [输出文件说明](./output_files.md) - 详细介绍所有输出文件及其格式 +- [更新日志](./changelog.md) - 版本更新历史和发布说明 + +## 文档概览 + +### 输出文件说明 + +理解 MinerU 生成的输出文件对于有效使用工具至关重要。输出文件文档提供了: + +- **可视化调试文件**:帮助您理解文档解析过程 +- **结构化数据文件**:包含详细的解析结果,可用于进一步处理 +- **文件格式规范**:每种输出文件类型的详细说明 + +### 更新日志 + +更新日志记录了 MinerU 的演进历程,包括: + +- **版本更新**:每个版本的新功能和改进 +- **错误修复**:每个版本中解决的问题 +- **重大变更**:可能影响您使用的重要变更 +- **功能弃用**:正在逐步淘汰的功能 diff --git a/docs/zh/reference/output_files.md b/docs/zh/reference/output_files.md new file mode 100644 index 0000000..2729305 --- /dev/null +++ b/docs/zh/reference/output_files.md @@ -0,0 +1,827 @@ +# MinerU 输出文件说明 + +## 概览 + +`mineru` 命令执行后,除了输出主要的 markdown 文件外,还会生成多个辅助文件用于调试、质检和进一步处理。这些文件包括: + +- **可视化调试文件**:帮助用户直观了解文档解析过程和结果 +- **结构化数据文件**:包含详细的解析数据,可用于二次开发 + +下面将详细介绍每个文件的作用和格式。 + +## 可视化调试文件 + +### 布局分析文件 (layout.pdf) + +**文件命名格式**:`{原文件名}_layout.pdf` + +**功能说明**: + +- 可视化展示每一页的布局分析结果 +- 每个检测框右上角的数字表示阅读顺序 +- 使用不同背景色块区分不同类型的内容块 + +**使用场景**: + +- 检查布局分析是否正确 +- 确认阅读顺序是否合理 +- 调试布局相关问题 + +![layout 页面示例](../images/layout_example.png) + +### 文本片段文件 (spans.pdf) + +> [!NOTE] +> 仅适用于 pipeline 后端 + +**文件命名格式**:`{原文件名}_spans.pdf` + +**功能说明**: + +- 根据 span 类型使用不同颜色线框标注页面内容 +- 用于质量检查和问题排查 + +**使用场景**: + +- 快速排查文本丢失问题 +- 检查行内公式识别情况 +- 验证文本分割准确性 + +![span 页面示例](../images/spans_example.png) + +## 结构化数据文件 + +> [!IMPORTANT] +> 2.5版本vlm后端的输出存在较大变化,与pipeline版本存在不兼容情况,如需基于结构化输出进行二次开发,请仔细阅读本文档内容。 + +### pipeline 后端 输出结果 + +#### 模型推理结果 (model.json) + +**文件命名格式**:`{原文件名}_model.json` + +##### 数据结构定义 + +```python +from pydantic import BaseModel, Field +from enum import IntEnum + +class CategoryType(IntEnum): + """内容类别枚举""" + title = 0 # 标题 + plain_text = 1 # 文本 + abandon = 2 # 包括页眉页脚页码和页面注释 + figure = 3 # 图片 + figure_caption = 4 # 图片描述 + table = 5 # 表格 + table_caption = 6 # 表格描述 + table_footnote = 7 # 表格注释 + isolate_formula = 8 # 行间公式 + formula_caption = 9 # 行间公式的标号 + embedding = 13 # 行内公式 + isolated = 14 # 行间公式 + text = 15 # OCR 识别结果 + +class PageInfo(BaseModel): + """页面信息""" + page_no: int = Field(description="页码序号,第一页的序号是 0", ge=0) + height: int = Field(description="页面高度", gt=0) + width: int = Field(description="页面宽度", ge=0) + +class ObjectInferenceResult(BaseModel): + """对象识别结果""" + category_id: CategoryType = Field(description="类别", ge=0) + poly: list[float] = Field(description="四边形坐标,格式为 [x0,y0,x1,y1,x2,y2,x3,y3]") + score: float = Field(description="推理结果的置信度") + latex: str | None = Field(description="LaTeX 解析结果", default=None) + html: str | None = Field(description="HTML 解析结果", default=None) + +class PageInferenceResults(BaseModel): + """页面推理结果""" + layout_dets: list[ObjectInferenceResult] = Field(description="页面识别结果") + page_info: PageInfo = Field(description="页面元信息") + +# 完整的推理结果 +inference_result: list[PageInferenceResults] = [] +``` + +##### 坐标系统说明 + +`poly` 坐标格式:`[x0, y0, x1, y1, x2, y2, x3, y3]` + +- 分别表示左上、右上、右下、左下四点的坐标 +- 坐标原点在页面左上角 + +![poly 坐标示意图](../images/poly.png) + +##### 示例数据 + +```json +[ + { + "layout_dets": [ + { + "category_id": 2, + "poly": [ + 99.1906967163086, + 100.3119125366211, + 730.3707885742188, + 100.3119125366211, + 730.3707885742188, + 245.81326293945312, + 99.1906967163086, + 245.81326293945312 + ], + "score": 0.9999997615814209 + } + ], + "page_info": { + "page_no": 0, + "height": 2339, + "width": 1654 + } + }, + { + "layout_dets": [ + { + "category_id": 5, + "poly": [ + 99.13092803955078, + 2210.680419921875, + 497.3183898925781, + 2210.680419921875, + 497.3183898925781, + 2264.78076171875, + 99.13092803955078, + 2264.78076171875 + ], + "score": 0.9999997019767761 + } + ], + "page_info": { + "page_no": 1, + "height": 2339, + "width": 1654 + } + } +] +``` + +#### 中间处理结果 (middle.json) + +**文件命名格式**:`{原文件名}_middle.json` + +##### 顶层结构 + +| 字段名 | 类型 | 说明 | +|--------|------|------| +| `pdf_info` | `list[dict]` | 每一页的解析结果数组 | +| `_backend` | `string` | 解析模式:`pipeline` 或 `vlm` | +| `_version_name` | `string` | MinerU 版本号 | + +##### 页面信息结构 (pdf_info) + +| 字段名 | 说明 | +|--------|------| +| `preproc_blocks` | PDF 预处理后的未分段中间结果 | +| `page_idx` | 页码,从 0 开始 | +| `page_size` | 页面的宽度和高度 `[width, height]` | +| `images` | 图片块信息列表 | +| `tables` | 表格块信息列表 | +| `interline_equations` | 行间公式块信息列表 | +| `discarded_blocks` | 需要丢弃的块信息 | +| `para_blocks` | 分段后的内容块结果 | + +##### 块结构层次 + +``` +一级块 (table | image) +└── 二级块 + └── 行 (line) + └── 片段 (span) +``` + +##### 一级块字段 + +| 字段名 | 说明 | +|--------|------| +| `type` | 块类型:`table` 或 `image` | +| `bbox` | 块的矩形框坐标 `[x0, y0, x1, y1]` | +| `blocks` | 包含的二级块列表 | + +##### 二级块字段 + +| 字段名 | 说明 | +|--------|------| +| `type` | 块类型(详见下表) | +| `bbox` | 块的矩形框坐标 | +| `lines` | 包含的行信息列表 | + +##### 二级块类型 + +| 类型 | 说明 | +|------|------| +| `image_body` | 图像本体 | +| `image_caption` | 图像描述文本 | +| `image_footnote` | 图像脚注 | +| `table_body` | 表格本体 | +| `table_caption` | 表格描述文本 | +| `table_footnote` | 表格脚注 | +| `text` | 文本块 | +| `title` | 标题块 | +| `index` | 目录块 | +| `list` | 列表块 | +| `interline_equation` | 行间公式块 | + +##### 行和片段结构 + +**行 (line) 字段**: +- `bbox`:行的矩形框坐标 +- `spans`:包含的片段列表 + +**片段 (span) 字段**: +- `bbox`:片段的矩形框坐标 +- `type`:片段类型(`image`、`table`、`text`、`inline_equation`、`interline_equation`) +- `content` | `img_path`:文本内容或图片路径 + +##### 示例数据 + +```json +{ + "pdf_info": [ + { + "preproc_blocks": [ + { + "type": "text", + "bbox": [ + 52, + 61.956024169921875, + 294, + 82.99800872802734 + ], + "lines": [ + { + "bbox": [ + 52, + 61.956024169921875, + 294, + 72.0000228881836 + ], + "spans": [ + { + "bbox": [ + 54.0, + 61.956024169921875, + 296.2261657714844, + 72.0000228881836 + ], + "content": "dependent on the service headway and the reliability of the departure ", + "type": "text", + "score": 1.0 + } + ] + } + ] + } + ], + "layout_bboxes": [ + { + "layout_bbox": [ + 52, + 61, + 294, + 731 + ], + "layout_label": "V", + "sub_layout": [] + } + ], + "page_idx": 0, + "page_size": [ + 612.0, + 792.0 + ], + "_layout_tree": [], + "images": [], + "tables": [], + "interline_equations": [], + "discarded_blocks": [], + "para_blocks": [ + { + "type": "text", + "bbox": [ + 52, + 61.956024169921875, + 294, + 82.99800872802734 + ], + "lines": [ + { + "bbox": [ + 52, + 61.956024169921875, + 294, + 72.0000228881836 + ], + "spans": [ + { + "bbox": [ + 54.0, + 61.956024169921875, + 296.2261657714844, + 72.0000228881836 + ], + "content": "dependent on the service headway and the reliability of the departure ", + "type": "text", + "score": 1.0 + } + ] + } + ] + } + ] + } + ], + "_backend": "pipeline", + "_version_name": "0.6.1" +} +``` + +#### 内容列表 (content_list.json) + +**文件命名格式**:`{原文件名}_content_list.json` + +##### 功能说明 + +这是一个简化版的 `middle.json`,按阅读顺序平铺存储所有可读内容块,去除了复杂的布局信息,便于后续处理。 + +##### 内容类型 + +| 类型 | 说明 | +|------|------| +| `image` | 图片 | +| `table` | 表格 | +| `text` | 文本/标题 | +| `equation` | 行间公式 | + +##### 文本层级标识 + +通过 `text_level` 字段区分文本层级: + +- 无 `text_level` 或 `text_level: 0`:正文文本 +- `text_level: 1`:一级标题 +- `text_level: 2`:二级标题 +- 以此类推... + +##### 通用字段 + +- 所有内容块都包含 `page_idx` 字段,表示所在页码(从 0 开始)。 +- 所有内容块都包含 `bbox` 字段,表示内容块的边界框坐标 `[x0, y0, x1, y1]` 映射在0-1000范围内的结果。 + +##### 示例数据 + +```json +[ + { + "type": "text", + "text": "The response of flow duration curves to afforestation ", + "text_level": 1, + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 0 + }, + { + "type": "image", + "img_path": "images/a8ecda1c69b27e4f79fce1589175a9d721cbdc1cf78b4cc06a015f3746f6b9d8.jpg", + "image_caption": [ + "Fig. 1. Annual flow duration curves of daily flows from Pine Creek, Australia, 1989–2000. " + ], + "image_footnote": [], + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 1 + }, + { + "type": "equation", + "img_path": "images/181ea56ef185060d04bf4e274685f3e072e922e7b839f093d482c29bf89b71e8.jpg", + "text": "$$\nQ _ { \\% } = f ( P ) + g ( T )\n$$", + "text_format": "latex", + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 2 + }, + { + "type": "table", + "img_path": "images/e3cb413394a475e555807ffdad913435940ec637873d673ee1b039e3bc3496d0.jpg", + "table_caption": [ + "Table 2 Significance of the rainfall and time terms " + ], + "table_footnote": [ + "indicates that the rainfall term was significant at the $5 \\%$ level, $T$ indicates that the time term was significant at the $5 \\%$ level, \\* represents significance at the $10 \\%$ level, and na denotes too few data points for meaningful analysis. " + ], + "table_body": "
SitePercentile
102030405060708090100
Traralgon CkPP,*PPP,P,P,P,PP
RedhillP,TP,T,***P.TP,*P*P**,*
Pine CkP,TP,TP,TP,TTTTnana
Stewarts Ck 5P,TP,TP,TP,TP.TP.TP,Tnanana
Glendhu 2PP,TP,*P,TP.TP,nsP,TP,TP,TP,T
Cathedral Peak 2P,TP,TP,TP,TP,T*,TP,TP,TP,TT
Cathedral Peak 3P.TP.TP,TP,TP,TTP,TP,TP,TT
Lambrechtsbos AP,TPPP,T*,T*,T*,T*,T*,TT
Lambrechtsbos BP,TP,TP,TP,TP,TP,TP,TP,TTT
BiesievleiP,TP.TP,TP,T*,T*,TTTP,TP,T
", + "bbox": [ + 62, + 480, + 946, + 904 + ], + "page_idx": 5 + } +] +``` + +### VLM 后端 输出结果 + +#### 模型推理结果 (model.json) + +**文件命名格式**:`{原文件名}_model.json` + +##### 文件格式说明 + +- 该文件为 VLM 模型的原始输出结果,包含两层嵌套list,外层表示页面,内层表示该页的内容块 +- 每个内容块都是一个dict,包含 `type`、`bbox`、`angle`、`content` 字段 + + +##### 支持的内容类型 + +```json +{ + "text": "文本", + "title": "标题", + "equation": "行间公式", + "image": "图片", + "image_caption": "图片描述", + "image_footnote": "图片脚注", + "table": "表格", + "table_caption": "表格描述", + "table_footnote": "表格脚注", + "phonetic": "拼音", + "code": "代码块", + "code_caption": "代码描述", + "ref_text": "参考文献", + "algorithm": "算法块", + "list": "列表", + "header": "页眉", + "footer": "页脚", + "page_number": "页码", + "aside_text": "装订线旁注", + "page_footnote": "页面脚注" +} +``` + +##### 坐标系统说明 + +`bbox` 坐标格式:`[x0, y0, x1, y1]` + +- 分别表示左上、右下两点的坐标 +- 坐标原点在页面左上角 +- 坐标为相对于原始页面尺寸的百分比,范围在0-1之间 + +##### 示例数据 + +```json +[ + [ + { + "type": "header", + "bbox": [ + 0.077, + 0.095, + 0.18, + 0.181 + ], + "angle": 0, + "score": null, + "block_tags": null, + "content": "ELSEVIER", + "format": null, + "content_tags": null + }, + { + "type": "title", + "bbox": [ + 0.157, + 0.228, + 0.833, + 0.253 + ], + "angle": 0, + "score": null, + "block_tags": null, + "content": "The response of flow duration curves to afforestation", + "format": null, + "content_tags": null + } + ] +] +``` + +#### 中间处理结果 (middle.json) + +**文件命名格式**:`{原文件名}_middle.json` + +##### 文件格式说明 +vlm 后端的 middle.json 文件结构与 pipeline 后端类似,但存在以下差异: + +- list变成二级block,增加`sub_type`字段区分list类型: + * `text`(文本类型) + * `ref_text`(引用类型) + +- 增加code类型block,code类型包含两种"sub_type": + * 分别是`code`和`algorithm` + * 至少有`code_body`, 可选`code_caption` + +- `discarded_blocks`内元素type增加以下类型: + * `header`(页眉) + * `footer`(页脚) + * `page_number`(页码) + * `aside_text`(装订线文本) + * `page_footnote`(脚注) +- 所有block增加`angle`字段,用来表示旋转角度,0,90,180,270 + + +##### 示例数据 +- list block 示例 + ```json + { + "bbox": [ + 174, + 155, + 818, + 333 + ], + "type": "list", + "angle": 0, + "index": 11, + "blocks": [ + { + "bbox": [ + 174, + 157, + 311, + 175 + ], + "type": "text", + "angle": 0, + "lines": [ + { + "bbox": [ + 174, + 157, + 311, + 175 + ], + "spans": [ + { + "bbox": [ + 174, + 157, + 311, + 175 + ], + "type": "text", + "content": "H.1 Introduction" + } + ] + } + ], + "index": 3 + }, + { + "bbox": [ + 175, + 182, + 464, + 229 + ], + "type": "text", + "angle": 0, + "lines": [ + { + "bbox": [ + 175, + 182, + 464, + 229 + ], + "spans": [ + { + "bbox": [ + 175, + 182, + 464, + 229 + ], + "type": "text", + "content": "H.2 Example: Divide by Zero without Exception Handling" + } + ] + } + ], + "index": 4 + } + ], + "sub_type": "text" + } + ``` +- code block 示例 + ```json + { + "type": "code", + "bbox": [ + 114, + 780, + 885, + 1231 + ], + "blocks": [ + { + "bbox": [ + 114, + 780, + 885, + 1231 + ], + "lines": [ + { + "bbox": [ + 114, + 780, + 885, + 1231 + ], + "spans": [ + { + "bbox": [ + 114, + 780, + 885, + 1231 + ], + "type": "text", + "content": "1 // Fig. H.1: DivideByZeroNoExceptionHandling.java \n2 // Integer division without exception handling. \n3 import java.util.Scanner; \n4 \n5 public class DivideByZeroNoExceptionHandling \n6 { \n7 // demonstrates throwing an exception when a divide-by-zero occurs \n8 public static int quotient( int numerator, int denominator ) \n9 { \n10 return numerator / denominator; // possible division by zero \n11 } // end method quotient \n12 \n13 public static void main(String[] args) \n14 { \n15 Scanner scanner = new Scanner(System.in); // scanner for input \n16 \n17 System.out.print(\"Please enter an integer numerator: \"); \n18 int numerator = scanner.nextInt(); \n19 System.out.print(\"Please enter an integer denominator: \"); \n20 int denominator = scanner.nextInt(); \n21" + } + ] + } + ], + "index": 17, + "angle": 0, + "type": "code_body" + }, + { + "bbox": [ + 867, + 160, + 1280, + 189 + ], + "lines": [ + { + "bbox": [ + 867, + 160, + 1280, + 189 + ], + "spans": [ + { + "bbox": [ + 867, + 160, + 1280, + 189 + ], + "type": "text", + "content": "Algorithm 1 Modules for MCTSteg" + } + ] + } + ], + "index": 19, + "angle": 0, + "type": "code_caption" + } + ], + "index": 17, + "sub_type": "code" + } + ``` + +#### 内容列表 (content_list.json) + +**文件命名格式**:`{原文件名}_content_list.json` + +##### 文件格式说明 +vlm 后端的 content_list.json 文件结构与 pipeline 后端类似,伴随本次middle.json的变化,做了以下调整: + +- 新增`code`类型,code类型包含两种"sub_type": + * 分别是`code`和`algorithm` + * 至少有`code_body`, 可选`code_caption` + +- 新增`list`类型,list类型包含两种"sub_type": + * `text` + * `ref_text` + +- 增加所有所有`discarded_blocks`的输出内容 + * `header` + * `footer` + * `page_number` + * `aside_text` + * `page_footnote` + +##### 示例数据 +- code 类型 content + ```json + { + "type": "code", + "sub_type": "algorithm", + "code_caption": [ + "Algorithm 1 Modules for MCTSteg" + ], + "code_body": "1: function GETCOORDINATE(d) \n2: $x \\gets d / l$ , $y \\gets d$ mod $l$ \n3: return $(x, y)$ \n4: end function \n5: function BESTCHILD(v) \n6: $C \\gets$ child set of $v$ \n7: $v' \\gets \\arg \\max_{c \\in C} \\mathrm{UCTScore}(c)$ \n8: $v'.n \\gets v'.n + 1$ \n9: return $v'$ \n10: end function \n11: function BACK PROPAGATE(v) \n12: Calculate $R$ using Equation 11 \n13: while $v$ is not a root node do \n14: $v.r \\gets v.r + R$ , $v \\gets v.p$ \n15: end while \n16: end function \n17: function RANDOMSEARCH(v) \n18: while $v$ is not a leaf node do \n19: Randomly select an untried action $a \\in A(v)$ \n20: Create a new node $v'$ \n21: $(x, y) \\gets \\mathrm{GETCOORDINATE}(v'.d)$ \n22: $v'.p \\gets v$ , $v'.d \\gets v.d + 1$ , $v'.\\Gamma \\gets v.\\Gamma$ \n23: $v'.\\gamma_{x,y} \\gets a$ \n24: if $a = -1$ then \n25: $v.lc \\gets v'$ \n26: else if $a = 0$ then \n27: $v.mc \\gets v'$ \n28: else \n29: $v.rc \\gets v'$ \n30: end if \n31: $v \\gets v'$ \n32: end while \n33: return $v$ \n34: end function \n35: function SEARCH(v) \n36: while $v$ is fully expanded do \n37: $v \\gets$ BESTCHILD(v) \n38: end while \n39: if $v$ is not a leaf node then \n40: $v \\gets$ RANDOMSEARCH(v) \n41: end if \n42: return $v$ \n43: end function", + "bbox": [ + 510, + 87, + 881, + 740 + ], + "page_idx": 0 + } + ``` +- list 类型 content + ```json + { + "type": "list", + "sub_type": "text", + "list_items": [ + "H.1 Introduction", + "H.2 Example: Divide by Zero without Exception Handling", + "H.3 Example: Divide by Zero with Exception Handling", + "H.4 Summary" + ], + "bbox": [ + 174, + 155, + 818, + 333 + ], + "page_idx": 0 + } + ``` +- discarded 类型 content + ```json + [{ + "type": "header", + "text": "Journal of Hydrology 310 (2005) 253-265", + "bbox": [ + 363, + 164, + 623, + 177 + ], + "page_idx": 0 + }, + { + "type": "page_footnote", + "text": "* Corresponding author. Address: Forest Science Centre, Department of Sustainability and Environment, P.O. Box 137, Heidelberg, Vic. 3084, Australia. Tel.: +61 3 9450 8719; fax: +61 3 9450 8644.", + "bbox": [ + 71, + 815, + 915, + 841 + ], + "page_idx": 0 + }] + ``` + + +## 总结 + +以上文件为 MinerU 的完整输出结果,用户可根据需要选择合适的文件进行后续处理: + +- **模型输出**(使用原始输出): + * model.json + +- **调试和验证**(使用可视化文件): + * layout.pdf + * spans.pdf + +- **内容提取**(使用简化文件): + * *.md + * content_list.json + +- **二次开发**(使用结构化文件): + * middle.json diff --git a/docs/zh/usage/acceleration_cards/AMD.md b/docs/zh/usage/acceleration_cards/AMD.md new file mode 100644 index 0000000..1e90e93 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/AMD.md @@ -0,0 +1,365 @@ +## 基于Triton的ROCm 不同后端实现优化,基本实现vllm后端正常推理,以及pipeline后端中第一步layout用的DocLayout-YOLO + +**已有完整python vllm和mineru环境直接跳转第五步!!!** +**其他GPU执行问题可以参考,先prof查看定位找到哪个算子问题,然后triton后端实现即可** +测试了一下,基本和MinerU官网效果差不多,用AMD的人也不是很多,就在评论区分享给大家了 + +### 1.结果介绍 +**补充一个200页的PDF python编程书测试一下速度,可以到1.99it/s:** +Two Step Extraction: 100%|████████████████████████████████████████| 200/200 [01:40<00:00, 1.99it/s] + +**下面为之前14学术论文测试结果:** +7900xtx mineru-gradio --server-name 0.0.0.0 --server-port 7860 --enable-vllm-engine true 速度大概为**1.6-1.8s/it**,没有仔细测试,简单试了两个文档。第二种矩阵乘法代替原来的dots点乘可以进一步提速到1.3s/it,优化后的主要算子耗时在hipblast(这个没法提升了)和vllm triton后端,各占25%耗时吧,vllm tirion后端这个这个只能等官方优化了。。。。 +doclayout-yolo的layout速度从原来的1.6it/s提高到15it/s,注意需要缓存一下输入的pdf尺寸后,triton必须要缓存尺寸没办法。主要是为了保留模型输入输出接口,最小代码改动。 +采用-b vlm-vllm-engine模式举个例子 + +--- +**测试结果为优化为5d矩阵乘代替原来的点积结果:** +2025-10-05 15:45:12.985 | INFO | mineru.backend.vlm.vlm_analyze:get_model:128 - get vllm-engine predictor cost: 18.45s +Adding requests: 100%|████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00, 12.20it/s] +Processed prompts: 100%|█████████████████████| 14/14 [00:08<00:00, 1.56it/s, est. speed input: 2174.18 toks/s, output: 791.87 toks/s] +Adding requests: 100%|█████████████████████████████████████████████████████████████████████████████| 278/278 [00:00<00:00, 323.03it/s] +Processed prompts: 100%|██████████████████| 278/278 [00:07<00:00, 37.63it/s, est. speed input: 5264.66 toks/s, output: 2733.31 toks/s] + +mineru-gradio --server-name 0.0.0.0 --server-port 7860 --enable-vllm-engine true测试: +2025-10-05 15:46:55.953 | WARNING | mineru.cli.common:convert_pdf_bytes_to_bytes_by_pypdfium2:54 - end_page_id is out of range, use pdf_docs length +Two Step Extraction: 100%|████████████████████████████████████████████████████████████████████████████| 14/14 [00:18<00:00, 1.30s/it] + +--- + +### 2.原因介绍 +AMD RDNA使用vllm后端有严重的性能问题,原因是因为vllm的**qwen2_vl.py**中有一个算子在rocm kernel上没有对应的实现,导致性能出现严重的卷积计算回退,一次执行花了12s,。。。。。。。。一言难尽。即**MIOpen 库中缺少模型中特定 Conv3d(bfloat16) 的优化内核**。 +DocLayout-YOLO的**g2l_crm.py**空洞卷积也是这个问题,专业的CDNA MI210也没解决这个问题 +正好一起处理了。 + +--- + +### 3.环境介绍 +System: Ubuntu 24.04.3 Kernel: Linux 6.14.0-33-generic ROCm version: 7.0.1 +python环境: +python 3.12 +pytorch-triton-rocm 3.5.0+gitbbb06c03 +torch 2.10.0.dev20251001+rocm7.0 +torchvision 0.25.0.dev20251003+rocm7.0 +vllm 0.11.0rc2.dev198+g736fbf4c8.rocm701 +不同版本无所谓,处理方法是一样的。 + +--- + +### 4.前置环境安装 +``` +uv venv --python python3.12 +source .venv/bin/activate +uv pip install --pre torch torchvision -i https://pypi.tuna.tsinghua.edu.cn/simple/ --extra-index-url https://download.pytorch.org/whl/nightly/rocm7.0 +uv pip install pip +# 避免覆盖我们本地的pytorch,改用pip而没有继续使用uv pip +pip install -U "mineru[core]" -i https://pypi.mirrors.ustc.edu.cn/simple/ +``` +vllm 安装参考官方手册[Vllm](https://docs.vllm.com.cn/en/latest/getting_started/installation/gpu.html#amd-rocm) +``` +#手动安装aiter,vllm,amd-smi等,自行找一个位置clone,然后进入该目录吧 +git clone --recursive https://github.com/ROCm/aiter.git +cd aiter +git submodule sync; git submodule update --init --recursive +python setup.py develop +cd .. +git clone https://github.com/vllm-project/vllm.git +cd vllm/ +cp -r /opt/rocm/share/amd_smi ~/Pytorch/vllm/ +pip install amd_smi/ +pip install --upgrade numba \ + scipy \ + huggingface-hub[cli,hf_transfer] \ + setuptools_scm +pip install -r requirements/rocm.txt +export PYTORCH_ROCM_ARCH="gfx1100" #根据自己的GPU架构 rocminfo | grep gfx +python setup.py develop +``` +--- + +### 5.vllm中关键triton算子添加 +#### 这里我给出两种解决方法,第一种解决方法就是前面提到的优化到1.5到1.8s/it,第二种方法有手动优化算子到矩阵乘法,7900xtx肯定适用,大概1.3s/it,其他AMD GPU相对方案一也有提速,但是不一定是最佳速度实现,里面的手动部分可能需要微调。 +**注意pip把triton 后端的flash_attn卸载了,搞了半天各种尝试还是报错,问题比较大,直接不用就行了** +``` +#定位自己vllm位置XXX +pip show vllm +``` +**关键更改** +XXX/vllm/model_executor/models/qwen2_vl.py文件: +**1.qwen2_vl.py文件33行下增加from .qwen2_vl_vision_kernels import triton_conv3d_patchify** +``` +from collections.abc import Iterable, Mapping, Sequence +from functools import partial +from typing import Annotated, Any, Callable, Literal, Optional, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from .qwen2_vl_vision_kernels import triton_conv3d_patchify +``` +**接下来分为方案一(2.1和3.1)和方案二(2.2和3.2),选取一种实现即可** + +--- +**方案1** +**2.1qwen2_vl.py文件498行class Qwen2VisionPatchEmbed(nn.Module),PS.就是这玩意AMD没有现成的内核算子导致回退** +``` +class Qwen2VisionPatchEmbed(nn.Module): + + def __init__( + self, + patch_size: int = 14, + temporal_patch_size: int = 2, + in_channels: int = 3, + embed_dim: int = 1152, + ) -> None: + super().__init__() + self.patch_size = patch_size + self.temporal_patch_size = temporal_patch_size + self.embed_dim = embed_dim + + kernel_size = (temporal_patch_size, patch_size, patch_size) + self.proj = nn.Conv3d(in_channels, + embed_dim, + kernel_size=kernel_size, + stride=kernel_size, + bias=False) + def forward(self, x: torch.Tensor) -> torch.Tensor: + L, C = x.shape + x_reshaped = x.view(L, -1, self.temporal_patch_size, self.patch_size, + self.patch_size) + + # Call your custom Triton kernel instead of self.proj + x_out = triton_conv3d_patchify(x_reshaped, self.proj.weight) + + # The output of our kernel is already the correct shape [L, embed_dim] + return x_out +``` +**3.1XXX/vllm/model_executor/models/目录下创建qwen2_vl_vision_kernels.py文件,用triton实现** +``` +import torch +from vllm.triton_utils import tl, triton + +@triton.jit +def _conv3d_patchify_kernel( + # Pointers to tensors + X, W, Y, + # Tensor dimensions + N, C_in, D_in, H_in, W_in, + C_out, KD, KH, KW, + # Stride and padding for memory access + stride_xn, stride_xc, stride_xd, stride_xh, stride_xw, + stride_wn, stride_wc, stride_wd, stride_wh, stride_ww, + stride_yn, stride_yc, + # Triton-specific metaparameters + BLOCK_SIZE: tl.constexpr, +): + """ + Triton kernel for a non-overlapping 3D patching convolution. + Each kernel instance computes one output value for one patch. + """ + # Get the program IDs for the N (patch) and C_out (output channel) dimensions + pid_n = tl.program_id(0) # The index of the patch we are processing + pid_cout = tl.program_id(1) # The index of the output channel we are computing + + # --- Calculate memory pointers --- + # Pointer to the start of the current input patch + x_ptr = X + (pid_n * stride_xn) + # Pointer to the start of the current filter (weight) + w_ptr = W + (pid_cout * stride_wn) + # Pointer to where the output will be stored + y_ptr = Y + (pid_n * stride_yn + pid_cout * stride_yc) + + # --- Perform the convolution (element-wise product and sum) --- + # This is a dot product between the flattened patch and the flattened filter. + accumulator = tl.zeros((BLOCK_SIZE,), dtype=tl.float32) + + # Iterate over the elements of the patch/filter + for c_offset in range(0, C_in): + for d_offset in range(0, KD): + for h_offset in range(0, KH): + # Unrolled loop for the innermost dimension (width) for performance + for w_offset in range(0, KW, BLOCK_SIZE): + # Create masks to handle cases where KW is not a multiple of BLOCK_SIZE + w_range = w_offset + tl.arange(0, BLOCK_SIZE) + w_mask = w_range < KW + + # Calculate offsets to load data + patch_offset = (c_offset * stride_xc + d_offset * stride_xd + + h_offset * stride_xh + w_range * stride_xw) + filter_offset = (c_offset * stride_wc + d_offset * stride_wd + + h_offset * stride_wh + w_range * stride_ww) + + # Load patch and filter data, applying masks + patch_vals = tl.load(x_ptr + patch_offset, mask=w_mask, other=0.0) + filter_vals = tl.load(w_ptr + filter_offset, mask=w_mask, other=0.0) + + # Multiply and accumulate + accumulator += patch_vals.to(tl.float32) * filter_vals.to(tl.float32) + + # Sum the accumulator block and store the single output value + output_val = tl.sum(accumulator, axis=0) + tl.store(y_ptr, output_val) + + +def triton_conv3d_patchify(x: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + """ + Python wrapper for the 3D patching convolution Triton kernel. + """ + # Get tensor dimensions + N, C_in, D_in, H_in, W_in = x.shape + C_out, _, KD, KH, KW = weight.shape + + # Create the output tensor + # The output of this specific conv is (N, C_out, 1, 1, 1), which we squeeze + Y = torch.empty((N, C_out), dtype=x.dtype, device=x.device) + + # Define the grid for launching the Triton kernel + # Each kernel instance handles one patch (N) for one output channel (C_out) + grid = (N, C_out) + + # Launch the kernel + # We pass all strides to make the kernel flexible + _conv3d_patchify_kernel[grid]( + x, weight, Y, + N, C_in, D_in, H_in, W_in, + C_out, KD, KH, KW, + x.stride(0), x.stride(1), x.stride(2), x.stride(3), x.stride(4), + weight.stride(0), weight.stride(1), weight.stride(2), weight.stride(3), weight.stride(4), + Y.stride(0), Y.stride(1), + BLOCK_SIZE=16, # A reasonable default, can be tuned + ) + + return Y +``` +--- +**方案2** +**2.2qwen2_vl.py文件498行class Qwen2VisionPatchEmbed(nn.Module)函数,PS.就是这玩意AMD没有现成的内核算子导致回退,这里我们直接5D张量一步到位,改为矩阵乘法** +``` +class Qwen2VisionPatchEmbed(nn.Module): + + def __init__( + self, + patch_size: int = 14, + temporal_patch_size: int = 2, + in_channels: int = 3, + embed_dim: int = 1152, + ) -> None: + super().__init__() + self.patch_size = patch_size + self.temporal_patch_size = temporal_patch_size + self.embed_dim = embed_dim + + kernel_size = (temporal_patch_size, patch_size, patch_size) + + self.proj = nn.Conv3d(in_channels, + embed_dim, + kernel_size=kernel_size, + stride=kernel_size, + bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + L, C = x.shape + x_reshaped_5d = x.view(L, -1, self.temporal_patch_size, self.patch_size, + self.patch_size) + + return triton_conv3d_patchify(x_reshaped_5d, self.proj.weight) +``` +**3.2XXX/vllm/model_executor/models/目录下创建qwen2_vl_vision_kernels.py文件,用triton实现** +``` +import torch +from vllm.triton_utils import tl, triton + +@triton.jit +def _conv_gemm_kernel( + A, B, C, M, N, K, + stride_am, stride_ak, + stride_bk, stride_bn, + stride_cm, stride_cn, + BLOCK_M: tl.constexpr, BLOCK_N: tl.constexpr, BLOCK_K: tl.constexpr, +): + pid_m = tl.program_id(0) + pid_n = tl.program_id(1) + offs_m = pid_m * BLOCK_M + tl.arange(0, BLOCK_M) + offs_n = pid_n * BLOCK_N + tl.arange(0, BLOCK_N) + offs_k = tl.arange(0, BLOCK_K) + a_ptrs = A + (offs_m[:, None] * stride_am + offs_k[None, :] * stride_ak) + b_ptrs = B + (offs_k[:, None] * stride_bk + offs_n[None, :] * stride_bn) + accumulator = tl.zeros((BLOCK_M, BLOCK_N), dtype=tl.float32) + for k in range(0, K, BLOCK_K): + a = tl.load(a_ptrs, mask=(offs_m[:, None] < M) & (offs_k[None, :] < K), other=0.0) + b = tl.load(b_ptrs, mask=(offs_k[:, None] < K) & (offs_n[None, :] < N), other=0.0) + accumulator += tl.dot(a, b) + a_ptrs += BLOCK_K * stride_ak + b_ptrs += BLOCK_K * stride_bk + offs_k += BLOCK_K + c = accumulator.to(C.dtype.element_ty) + offs_cm = pid_m * BLOCK_M + tl.arange(0, BLOCK_M) + offs_cn = pid_n * BLOCK_N + tl.arange(0, BLOCK_N) + c_ptrs = C + stride_cm * offs_cm[:, None] + stride_cn * offs_cn[None, :] + c_mask = (offs_cm[:, None] < M) & (offs_cn[None, :] < N) + tl.store(c_ptrs, c, mask=c_mask) + +def triton_conv3d_patchify(x_5d: torch.Tensor, weight_5d: torch.Tensor) -> torch.Tensor: + N_patches, _, _, _, _ = x_5d.shape + C_out, _, _, _, _ = weight_5d.shape + A = x_5d.view(N_patches, -1) + B = weight_5d.view(C_out, -1).transpose(0, 1).contiguous() + M, K = A.shape + _K, N = B.shape + assert K == _K + C = torch.empty((M, N), device=A.device, dtype=A.dtype) + + # --- 针对7900xtx的手动调优配置,其他GPU的最优组合可能需要自行寻找,AMD的autotune效果就是没有效果 --- + best_config = { + 'BLOCK_M': 128, + 'BLOCK_N': 128, + 'BLOCK_K': 32, + } + num_stages = 4 + num_warps = 8 + + grid = (triton.cdiv(M, best_config['BLOCK_M']), + triton.cdiv(N, best_config['BLOCK_N'])) + + _conv_gemm_kernel[grid]( + A, B, C, + M, N, K, + A.stride(0), A.stride(1), + B.stride(0), B.stride(1), + C.stride(0), C.stride(1), + **best_config, + num_stages=num_stages, + num_warps=num_warps + ) + + return C +``` +--- +**4.关闭终端后再次使用mineru-gradio会报一个Lora错误,修改代码跳过它** +``` +pip show mineru_vl_utils +``` + +打开该文件XXX/mineru_vl_utils/vlm_client/vllm_async_engine_client.py修改第58行self.tokenizer = vllm_async_llm.tokenizer.get_lora_tokenizer()为: +``` + try: + self.tokenizer = vllm_async_llm.tokenizer.get_lora_tokenizer() + except AttributeError: + # 如果没有 get_lora_tokenizer 方法,直接使用原始 tokenizer + self.tokenizer = vllm_async_llm.tokenizer +``` + +**最后整两个环境变量后愉快玩耍即可** +``` +export MINERU_MODEL_SOURCE=modelscope +export TORCH_ROCM_AOTRITON_ENABLE_EXPERIMENTAL=1 +``` +--- + +### 6.vllm后端已经没有问题,下面是pipeline 中layout用的doclayout-yolo模型空洞卷积问题 +### 我在 [DocLayout-YOLO](https://github.com/opendatalab/DocLayout-YOLO/issues/120#issuecomment-3368144275) 下做了一个回答,因此 pipeline 的空洞卷积问题不在这里赘述,直接点击链接查看即可。 +查看自己doclayout-yolo安装位置如下,然后进入修改链接中回复介绍的文件即可 +``` +pip show doclayout-yolo +``` + diff --git a/docs/zh/usage/acceleration_cards/Ascend.md b/docs/zh/usage/acceleration_cards/Ascend.md new file mode 100644 index 0000000..03bf030 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/Ascend.md @@ -0,0 +1,174 @@ +## 1. 测试平台 +以下为本指南测试使用的平台信息,供参考: +``` +os: CTyunOS 22.06 +cpu: Kunpeng-920 (aarch64) +npu: Ascend 910B2 +driver: 23.0.3 +docker: 20.10.12 +``` + +## 2. 环境准备 + +>[!NOTE] +>Ascend加速卡支持使用`vllm`或`lmdeploy`进行VLM模型推理加速。请根据实际需求选择安装和使用其中之一: + +### 2.1 使用 Dockerfile 构建镜像 (vllm) +> [!TIP] +> ascend-vllm支持设备如下: +> +> - Atlas A2 training series (Atlas 800T A2, Atlas 900 A2 PoD, Atlas 200T A2 Box16, Atlas 300T A2) +> - Atlas 800I A2 inference series (Atlas 800I A2) +> - Atlas A3 training series (Atlas 800T A3, Atlas 900 A3 SuperPoD, Atlas 9000 A3 SuperPoD) +> - Atlas 800I A3 inference series (Atlas 800I A3) +> - [Experimental] Atlas 300I inference series (Atlas 300I Duo) +> +> Dockerfile文件第三行为ascend-vllm基础镜像信息,默认tag为A2适配的版本,例如 `v0.11.0` +> +> - 如需使用A3适配的版本,请将第三行的tag修改为 `v0.11.0-a3`,然后再执行build操作。 +> - 如需使用Atlas 300I Duo适配的版本,请将第三行的tag修改为 `v0.10.0rc1-310p`,然后再执行build操作。 + + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/npu.Dockerfile +docker build --network=host -t mineru:npu-vllm-latest -f npu.Dockerfile . +``` + +### 2.2 使用 Dockerfile 构建镜像 (lmdeploy) + +> [!TIP] +> ascend-lmdeploy支持设备如下: +> +> - Atlas A2 training series (Atlas 800T A2, Atlas 900 A2 PoD, Atlas 200T A2 Box16, Atlas 300T A2) +> - Atlas 800I A2 inference series (Atlas 800I A2) +> +> 如果您的设备为Atlas A3系列或Atlas 300I Duo系列,请使用vllm版本的镜像。 + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/npu.Dockerfile +# 将基础镜像从 vllm 切换为 lmdeploy +sed -i '3s/^/# /' npu.Dockerfile && sed -i '5s/^# //' npu.Dockerfile +docker build --network=host -t mineru:npu-lmdeploy-latest -f npu.Dockerfile . +``` + +## 3. 启动 Docker 容器 + +```bash +docker run -u root --name mineru_docker --privileged=true \ + --ipc=host \ + --network=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /var/log/npu/:/usr/slog \ + -v /usr/local/dcmi:/usr/local/dcmi \ + -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -e VLLM_WORKER_MULTIPROC_METHOD=spawn \ + -e MINERU_MODEL_SOURCE=local \ + -e MINERU_LMDEPLOY_DEVICE=ascend \ + -it mineru:npu-vllm-latest \ + /bin/bash +``` + +>[!TIP] +> 请根据实际情况选择使用`vllm`或`lmdeploy`版本的镜像,如需使用lmdeploy,替换上述命令中的`mineru:npu-vllm-latest`为`mineru:npu-lmdeploy-latest`即可。 + +执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + +>[!NOTE] +> 由于310p加速卡不支持bf16精度,因此在使用该加速卡时,执行任意与`vllm`相关命令需追加`--enforce-eager --dtype float16`参数。 + +## 4. 注意事项 + +不同环境下,MinerU对Ascend加速卡的支持情况如下表所示: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景容器环境
vllmlmdeploy
命令行工具(mineru)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
fastapi服务(mineru-api)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
gradio界面(mineru-gradio)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
openai-server服务(mineru-openai-server)🟢🟢
数据并行 (--data-parallel-size/--dp)🟢🔴
+ +注: +🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致 +🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +🔴: 不支持,无法运行,或精度存在较大差异 + +>[!TIP] +>NPU加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[ASCEND_RT_VISIBLE_DEVICES](https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/850alpha001/maintenref/envvar/envref_07_0028.html) \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/Cambricon.md b/docs/zh/usage/acceleration_cards/Cambricon.md new file mode 100644 index 0000000..1918c41 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/Cambricon.md @@ -0,0 +1,253 @@ +# MinerU +## 1. 环境准备 +容器启动方式见第3节 +### 1.1 获取代码 +``` +git clone https://github.com/opendatalab/MinerU.git +git checkout fa1149cd4abf9db5e0f13e4e074cdb568be189f4 +``` +### 1.2 安装依赖 +``` +source /torch/venv3/pytorch_infer/bin/activate +pip install accelerate==1.11.0 doclayout_yolo==0.0.4 thop==0.1.1.post2209072238 ultralytics-thop==2.0.18 ultralytics==8.3.228 +# requirements_check.txt具体内容在下面 +pip install -r requirements_check.txt +cd MinerU +pip install -e .[core] --no-deps +``` +requirements_check.txt +``` +# triton==3.0.0+mlu1.3.1 +# torch==2.5.0+cpu +# torchvision==0.20.0+cpu + + +# === 1. 已安装且版本相同 === +# (这些包已满足要求, 无需操作) + + +# === 2. 已安装但版本不同 === +# (运行 pip install -r 将强制更新到左侧的目标版本) +# accelerate==1.11.0 # 0.33.0 +beautifulsoup4==4.14.2 # 4.12.3 +cffi==2.0.0 # 1.17.1 +huggingface-hub==0.36.0 # 0.25.2 +jiter==0.12.0 # 0.8.2 +openai==2.8.0 # 1.59.7 +pillow==11.3.0 # 10.4.0 +sympy==1.14.0 # 1.13.1 +tokenizers==0.22.1 # 0.21.0 +# torch==2.9.1 # 2.5.0+cpu +# torchvision==0.24.1 # 0.20.0+cpu +transformers==4.57.1 # 4.48.0 +# triton==3.5.1 # 3.0.0+mlu1.3.1 +typing-extensions==4.15.0 # 4.12.2 + +# === 3. 未安装 === +# (运行 pip install -r 将安装这些包) +aiofiles==24.1.0 +albucore==0.0.24 +albumentations==2.0.8 +antlr4-python3-runtime==4.9.3 +brotli==1.2.0 +coloredlogs==15.0.1 +colorlog==6.10.1 +cryptography==46.0.3 +# doclayout_yolo==0.0.4 +fast-langdetect==0.2.5 +fasttext-predict==0.9.2.4 +ffmpy==1.0.0 +flatbuffers==25.9.23 +ftfy==6.3.1 +gradio-client==1.13.3 +gradio-pdf==0.0.22 +gradio==5.49.1 +groovy==0.1.2 +hf-xet==1.2.0 +httpx-retries==0.4.5 +humanfriendly==10.0 +imageio==2.37.2 +json-repair==0.53.0 +magika==0.6.3 +markdown-it-py==4.0.0 +mdurl==0.1.2 +mineru-vl-utils==0.1.15 +mineru==2.6.4 +modelscope==1.31.0 +# nvidia-cublas-cu12==12.8.4.1 +# nvidia-cuda-cupti-cu12==12.8.90 +# nvidia-cuda-nvrtc-cu12==12.8.93 +# nvidia-cuda-runtime-cu12==12.8.90 +# nvidia-cudnn-cu12==9.10.2.21 +# nvidia-cufft-cu12==11.3.3.83 +# nvidia-cufile-cu12==1.13.1.3 +# nvidia-curand-cu12==10.3.9.90 +# nvidia-cusolver-cu12==11.7.3.90 +# nvidia-cusparse-cu12==12.5.8.93 +# nvidia-cusparselt-cu12==0.7.1 +# nvidia-nccl-cu12==2.27.5 +# nvidia-nvjitlink-cu12==12.8.93 +# nvidia-nvshmem-cu12==3.3.20 +# nvidia-nvtx-cu12==12.8.90 +omegaconf==2.3.0 +onnxruntime==1.23.2 +orjson==3.11.4 +pdfminer.six==20250506 +pdftext==0.6.3 +polars-runtime-32==1.35.2 +polars==1.35.2 +pyclipper==1.3.0.post6 +pydantic-settings==2.12.0 +pydub==0.25.1 +pypdf==6.2.0 +pypdfium2==4.30.0 +python-multipart==0.0.20 +reportlab==4.4.4 +rich==14.2.0 +robust-downloader==0.0.2 +ruff==0.14.5 +safehttpx==0.1.7 +scikit-image==0.25.2 +seaborn==0.13.2 +semantic-version==2.10.0 +shapely==2.1.2 +shellingham==1.5.4 +simsimd==6.5.3 +stringzilla==4.2.3 +# thop==0.1.1.post2209072238 +tifffile==2025.5.10 +typer==0.20.0 +typing-inspection==0.4.2 +# ultralytics-thop==2.0.18 +# ultralytics==8.3.228 +``` +### 1.3 修改代码 +/raid_data/home/yqk/mineru-251114/MinerU/mineru/backend/pipeline/pipeline_analyze.py, line 1 +添加代码 +``` +# 添加MLU支持 +import torch_mlu.utils.gpu_migration +# 高版本镜像为 +# import torch.mlu.utils.gpu_migration +``` + +## 2. 使用方法 +``` +export HF_ENDPOINT=https://hf-mirror.com +mineru-api --host 0.0.0.0 --port 8009 +``` + +## 3. 其他 + +### 3.1 Dify插件配置问题 +给Dify的MinerU插件使用时,需将Dify的.env文件中FILES_URL设置为http://{ip}:{dify的网页访问端口}。 +根据网上找到的很多回答可能是要暴露5001,并将FILES_URL设置为http://{ip}:5001,并暴露5001端口,但其实设置为dify的网页访问端口即可。 + +### 3.2 容器启动方式 + +``` +export MY_CONTAINER="[容器名称]" +num=`docker ps -a|grep "$MY_CONTAINER" | wc -l` +echo $num +echo $MY_CONTAINER +if [ 0 -eq $num ];then +docker run -d \ + --privileged \ + --pid=host \ + --net=host \ + --shm-size 64g \ + --device /dev/cambricon_dev0 \ + --device /dev/cambricon_ipcm0 \ + --device /dev/cambricon_ctl \ + --name $MY_CONTAINER \ + -v [/path/to/your/data:/path/to/your/data] \ + -v /usr/bin/cnmon:/usr/bin/cnmon \ + [镜像名称] \ + sleep infinity +docker exec -ti $MY_CONTAINER /bin/bash +else + docker start $MY_CONTAINER + docker exec -ti $MY_CONTAINER /bin/bash +fi +``` + +### 3.3 将上面的过程进行打包 + +准备好前面的requirements_check.txt + +Dockerfile + +``` +# 1. 使用指定的基础镜像 +FROM cambricon-base/pytorch:v25.01-torch2.5.0-torchmlu1.24.1-ubuntu22.04-py310 + +# 2. 设置环境变量 +ENV HF_ENDPOINT=https://hf-mirror.com + +# 3. 定义 venv_pip 路径以便复用 +# 基础镜像中的虚拟环境路径 +ARG VENV_PIP=/torch/venv3/pytorch_infer/bin/pip + +# 4. 设置工作目录 +WORKDIR /app + +# 5. 安装 git (基础镜像可能不包含) +RUN apt-get update && apt-get install -y git && \ + rm -rf /var/lib/apt/lists/* + +# 6. 复制 requirements_check.txt 到镜像中 +# (这个文件需要您在宿主机上和 Dockerfile 放在同一目录下) +COPY requirements_check.txt . + +# 7. 步骤 1.1 & 1.2: 获取代码并安装所有依赖 +# 在一个 RUN 层中执行所有安装,以优化镜像大小 +RUN \ + # 1.1 获取代码 + echo "Cloning MinerU repository..." && \ + git clone https://gh-proxy.org/https://github.com/opendatalab/MinerU.git && \ + cd MinerU && \ + git checkout fa1149cd4abf9db5e0f13e4e074cdb568be189f4 && \ + cd .. && \ + \ + # 1.2 安装依赖 + # 第1个pip install (来自您的步骤) + echo "Installing initial dependencies..." && \ + ${VENV_PIP} install accelerate==1.11.0 doclayout_yolo==0.0.4 thop==0.1.1.post2209072238 ultralytics-thop==2.0.18 ultralytics==8.3.228 && \ + \ + # 第2个pip install (来自 requirements_check.txt) + echo "Installing dependencies from requirements_check.txt..." && \ + # 注意:基础镜像已包含 torch 和 triton,requirements_check.txt 中的注释行会被 pip 自动忽略 + ${VENV_PIP} install -r requirements_check.txt && \ + \ + # 第3个pip install (本地安装 MinerU) + echo "Installing MinerU in editable mode..." && \ + cd MinerU && \ + ${VENV_PIP} install -e .[core] --no-deps + +# 8. 步骤 1.3: 修改代码 +# 将 MLU 支持代码添加到指定文件的开头 +RUN echo "Applying MLU patch to pipeline_analyze.py..." && \ + sed -i '1i# 添加MLU支持\nimport torch_mlu.utils.gpu_migration\n# 高版本镜像为\n# import torch.mlu.utils.gpu_migration\n' \ + /app/MinerU/mineru/backend/pipeline/pipeline_analyze.py +``` + +该镜像的启动 + +``` +docker run -d --restart=always \ + --privileged \ + --pid=host \ + --net=host \ + --shm-size 64g \ + --device /dev/cambricon_dev0 \ + --device /dev/cambricon_ipcm0 \ + --device /dev/cambricon_ctl \ + --name mineru_service \ + mineru-mlu:latest \ + /torch/venv3/pytorch_infer/bin/python /app/MinerU/mineru/cli/fast_api.py --host 0.0.0.0 --port 8009 +``` + + + + + diff --git a/docs/zh/usage/acceleration_cards/Hygon.md b/docs/zh/usage/acceleration_cards/Hygon.md new file mode 100644 index 0000000..9b3e2da --- /dev/null +++ b/docs/zh/usage/acceleration_cards/Hygon.md @@ -0,0 +1,115 @@ +## 1. 测试平台 +以下为本指南测试使用的平台信息,供参考: +``` +os: Ubuntu 22.04.3 LTS +cpu: Hygon Hygon C86-4G(x86-64) +dcu: BW200 +driver: 6.3.13-V1.12.0a +docker: 20.10.24 +``` + +## 2. 环境准备 + +### 2.1 使用 Dockerfile 构建镜像 + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/dcu.Dockerfile +docker build --network=host -t mineru:dcu-vllm-latest -f dcu.Dockerfile . +``` + + +## 3. 启动 Docker 容器 + +```bash +docker run -u root --name mineru_docker \ + --network=host \ + --ipc=host \ + --shm-size=16G \ + --device=/dev/kfd \ + --device=/dev/mkfd \ + --device=/dev/dri \ + -v /opt/hyhal:/opt/hyhal \ + --group-add video \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + -e MINERU_MODEL_SOURCE=local \ + -it mineru:dcu-vllm-latest \ + /bin/bash +``` + +执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + + +## 4. 注意事项 + +不同环境下,MinerU对Hygon加速卡的支持情况如下表所示: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景容器环境
vllm
命令行工具(mineru)pipeline🟢
<vlm/hybrid>-auto-engine🟢
<vlm/hybrid>-http-client🟢
fastapi服务(mineru-api)pipeline🟢
<vlm/hybrid>-auto-engine🟢
<vlm/hybrid>-http-client🟢
gradio界面(mineru-gradio)pipeline🟢
<vlm/hybrid>-auto-engine🟢
<vlm/hybrid>-http-client🟢
openai-server服务(mineru-openai-server)🟢
数据并行 (--data-parallel-size)🟢
+ +注: +🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致 +🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +🔴: 不支持,无法运行,或精度存在较大差异 + +>[!TIP] +>DCU加速卡指定可用加速卡的方式与AMD GPU类似,请参考[GPU isolation techniques](https://rocm.docs.amd.com/en/docs-6.2.4/conceptual/gpu-isolation.html) \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/METAX.md b/docs/zh/usage/acceleration_cards/METAX.md new file mode 100644 index 0000000..95c46ee --- /dev/null +++ b/docs/zh/usage/acceleration_cards/METAX.md @@ -0,0 +1,151 @@ +## 1. 测试平台 +以下为本指南测试使用的平台信息,供参考: +``` +os: Ubuntu 22.04 +cpu: INTEL x86_64 +gpu: C500 +driver: 2.12.13 +docker: 28.1.1 +``` + +## 2. 环境准备 + +>[!NOTE] +>maca加速卡支持使用`vllm`或`lmdeploy`进行VLM模型推理加速。请根据实际需求选择安装和使用其中之一: + +### 2.1 使用metax官方镜像作为基础镜像构建vllm环境镜像 + +1. 从metax官方仓库拉取基础镜像 + - 1.1 镜像获取地址:[https://developer.metax-tech.com/softnova/docker](https://developer.metax-tech.com/softnova/docker) + - 1.2 在镜像网站选择`AI`分类,软件包类型选择`vllm`,操作系统选择`ubuntu` + - 1.3 找到`vllm:maca.ai3.1.0.7-torch2.6-py310-ubuntu22.04-amd64`镜像,复制拉取命令并在本地终端执行 +2. 使用 Dockerfile 构建镜像 (vllm) + ```bash + wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/maca.Dockerfile + docker build --network=host -t mineru:maca-vllm-latest -f maca.Dockerfile . + ``` + + +### 2.2 使用 Dockerfile 构建镜像 (lmdeploy) + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/maca.Dockerfile +# 将基础镜像从 vllm 切换为 lmdeploy +sed -i '3s/^/# /' maca.Dockerfile && sed -i '5s/^# //' maca.Dockerfile +docker build --network=host -t mineru:maca-lmdeploy-latest -f maca.Dockerfile . +``` + +## 3. 启动 Docker 容器 + +```bash +docker run --ipc host \ + --cap-add SYS_PTRACE \ + --privileged=true \ + --device=/dev/mem \ + --device=/dev/dri \ + --device=/dev/mxcd \ + --device=/dev/infiniband \ + --group-add video \ + --network=host \ + --shm-size '100gb' \ + --ulimit memlock=-1 \ + --security-opt seccomp=unconfined \ + --security-opt apparmor=unconfined \ + --name mineru_docker \ + -v /datapool:/datapool \ + -e MINERU_MODEL_SOURCE=local \ + -e MINERU_LMDEPLOY_DEVICE=maca \ + -it mineru:maca-vllm-latest \ + /bin/bash +``` + +>[!TIP] +> 请根据实际情况选择使用`vllm`或`lmdeploy`版本的镜像,如需使用lmdeploy,替换上述命令中的`mineru:maca-vllm-latest`为`mineru:maca-lmdeploy-latest`即可。 + +执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + +## 4. 注意事项 + +不同环境下,MinerU对maca加速卡的支持情况如下表所示: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景容器环境
vllmlmdeploy
命令行工具(mineru)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
fastapi服务(mineru-api)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
gradio界面(mineru-gradio)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
openai-server服务(mineru-openai-server)🟢🟢
数据并行 (--data-parallel-size/--dp)🔴🔴
+ +注: +🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致 +🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +🔴: 不支持,无法运行,或精度存在较大差异 + +>[!TIP] +>MACA加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明。 \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/THead.md b/docs/zh/usage/acceleration_cards/THead.md new file mode 100644 index 0000000..e1ca16e --- /dev/null +++ b/docs/zh/usage/acceleration_cards/THead.md @@ -0,0 +1,142 @@ +## 1. 测试平台 +以下为本指南测试使用的平台信息,供参考: +``` +os: Ubuntu 22.04 +cpu: INTEL x86_64 +ppu: ZW810E +driver: 1.4.0 +docker: 26.1.4 +``` + +## 2. 环境准备 + +>[!NOTE] +>ppu加速卡支持使用`vllm`或`lmdeploy`进行VLM模型推理加速。请根据实际需求选择安装和使用其中之一: + +### 2.1 使用 Dockerfile 构建镜像 (vllm) + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/ppu.Dockerfile +docker build --network=host -t mineru:ppu-vllm-latest -f ppu.Dockerfile . +``` + +### 2.2 使用 Dockerfile 构建镜像 (lmdeploy) + +```bash +wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/ppu.Dockerfile +# 将基础镜像从 vllm 切换为 lmdeploy +sed -i '3s/^/# /' ppu.Dockerfile && sed -i '5s/^# //' ppu.Dockerfile +docker build --network=host -t mineru:ppu-lmdeploy-latest -f ppu.Dockerfile . +``` + + +## 3. 启动 Docker 容器 + +```bash +docker run --privileged=true \ + --name mineru_docker \ + --device=/dev/alixpu \ + --device=/dev/alixpu_ctl \ + --ipc=host \ + --network=host \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + --shm-size=500g \ + -v /mnt:/mnt \ + -v /datapool:/datapool \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -e MINERU_MODEL_SOURCE=local \ + -it mineru:ppu-vllm-latest \ + /bin/bash +``` + +>[!TIP] +> 请根据实际情况选择使用`vllm`或`lmdeploy`版本的镜像,如需使用lmdeploy,替换上述命令中的`mineru:ppu-vllm-latest`为`mineru:ppu-lmdeploy-latest`即可。 + +执行该命令后,您将进入到Docker容器的交互式终端,您可以直接在容器内运行MinerU相关命令来使用MinerU的功能。 +您也可以直接通过替换`/bin/bash`为服务启动命令来启动MinerU服务,详细说明请参考[通过命令启动服务](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#apiwebuihttp-clientserver)。 + +## 4. 注意事项 + +不同环境下,MinerU对ppu加速卡的支持情况如下表所示: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景容器环境
vllmlmdeploy
命令行工具(mineru)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
fastapi服务(mineru-api)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
gradio界面(mineru-gradio)pipeline🟢🟢
<vlm/hybrid>-auto-engine🟢🟢
<vlm/hybrid>-http-client🟢🟢
openai-server服务(mineru-openai-server)🟢🟢
数据并行 (--data-parallel-size/--dp)🟡🔴
+ +注: +🟢: 支持,运行较稳定,精度与Nvidia GPU基本一致 +🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +🔴: 不支持,无法运行,或精度存在较大差异 + +>[!TIP] +>PPU加速卡指定可用加速卡的方式与NVIDIA GPU类似,请参考[使用指定GPU设备](https://opendatalab.github.io/MinerU/zh/usage/advanced_cli_parameters/#cuda_visible_devices)章节说明。 \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/Tecorigin.md b/docs/zh/usage/acceleration_cards/Tecorigin.md new file mode 100644 index 0000000..e969424 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/Tecorigin.md @@ -0,0 +1,73 @@ +# TECO适配 + +## 快速开始 +使用本工具执行推理的主要流程如下: +1. 基础环境安装:介绍推理前需要完成的基础环境检查和安装。 +3. 构建Docker环境:介绍如何使用Dockerfile创建模型推理时所需的Docker环境。 +4. 启动推理:介绍如何启动推理。 + +### 1 基础环境安装 +请参考[Teco用户手册的安装准备章节](http://docs.tecorigin.com/release/torch_2.4/v2.2.0/#fc980a30f1125aa88bad4246ff0cedcc),完成训练前的基础环境检查和安装。 + +### 2 构建docker +#### 2.1 执行以下命令,下载Docker镜像至本地(Docker镜像包:pytorch-3.0.0-torch_sdaa3.0.0.tar) + + wget 镜像下载链接(链接获取请联系太初内部人员) + +#### 2.2 校验Docker镜像包,执行以下命令,生成MD5码是否与官方MD5码b2a7f60508c0d199a99b8b6b35da3954一致: + + md5sum pytorch-3.0.0-torch_sdaa3.0.0.tar + +#### 2.3 执行以下命令,导入Docker镜像 + + docker load < pytorch-3.0.0-torch_sdaa3.0.0.tar + +#### 2.4 执行以下命令,构建名为MinerU的Docker容器 + + docker run -itd --name="MinerU" --net=host --device=/dev/tcaicard0 --device=/dev/tcaicard1 --device=/dev/tcaicard2 --device=/dev/tcaicard3 --cap-add SYS_PTRACE --cap-add SYS_ADMIN --shm-size 64g jfrog.tecorigin.net/tecotp-docker/release/ubuntu22.04/x86_64/pytorch:3.0.0-torch_sdaa3.0.0 /bin/bash + +#### 2.5 执行以下命令,进入名称为tecopytorch_docker的Docker容器。 + + docker exec -it MinerU bash + + +### 3 执行以下命令安装MinerU +- 安装前的准备 + ``` + cd + pip install --upgrade pip + pip install uv + ``` +- 由于镜像中安装了torch,并且不需要安装nvidia-nccl-cu12、nvidia-cudnn-cu12等包,因此需要注释掉一部分安装依赖。 +- 请注释掉/pyproject.toml文件中所有的"doclayout_yolo==0.0.4"依赖,并且将torch开头的包也注释掉。 +- 执行以下命令安装MinerU + ``` + uv pip install -e .[core] + ``` +- 下载安装doclayout_yolo==0.0.4 + ``` + pip install doclayout_yolo==0.0.4 --no-deps + ``` +- 下载安装其他包(doclayout_yolo==0.0.4的依赖) + ``` + pip install albumentations py-cpuinfo seaborn thop numpy==1.24.4 + ``` +- 由于部分张量内部内存分布不连续,需要修改如下两个文件 + /ultralytics/utils/tal.py(330行左右,将view --> reshape) + /doclayout_yolo/utils/tal.py(375行左右,将view --> reshape) +### 4 执行推理 +- 开启sdaa环境 + ``` + export TORCH_SDAA_AUTOLOAD=cuda_migrate + ``` +- 首次运行推理命令前请添加以下环境下载模型权重 + ``` + export HF_ENDPOINT=https://hf-mirror.com + ``` +- 运行以下命令执行推理 + ``` + mineru -p 'input path' -o 'output_path' --lang 'model_name' + ``` +其中model_name可从'ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', 'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari'选择 +### 5 适配用到的软件栈版本列表 +使用v3.0.0软件栈版本适配,获取方式联系太初内部人员 \ No newline at end of file diff --git a/docs/zh/usage/acceleration_cards/VastAI.md b/docs/zh/usage/acceleration_cards/VastAI.md new file mode 100644 index 0000000..8493651 --- /dev/null +++ b/docs/zh/usage/acceleration_cards/VastAI.md @@ -0,0 +1,223 @@ +## 1. 瀚博半导体 + +![vastaitech](https://github.com/Vastai/VastModelZOO/blob/main/images/index/logo.png?raw=true) + +- 官方网址:https://www.vastaitech.com +- 模型中心:https://github.com/Vastai/VastModelZOO + + +## 2. 测试平台 + +- 以下为本指南测试使用的平台信息,供参考 + ``` + os: Ubuntu-22.04.3-LTS-x86_64 + cpu: Hygon C86-4G + gpu: VA16 / VA1L / VA10L + torch: 2.8.0+cpu + torch-vacc: 1.3.3.777 + vllm: 0.11.1.dev0+gb8b302cde.d20251030.cpu + vllm-vacc: 0.11.0.777 + driver: 00.25.12.30 d3_3_v2_9_a3_1 a76bf37 20251230 + docker: 28.1.1 + ``` + +## 3. 环境准备 + +- 获取vllm_vacc基础镜像 + ```bash + sudo docker pull harbor.vastaitech.com/ai_deliver/vllm_vacc:VVI-25.12.SP2 + ``` + +- 启动容器 + ```bash + sudo docker run -it \ + --privileged=true \ + --shm-size=256g \ + --name vllm_service \ + --ipc=host \ + --network=host \ + harbor.vastaitech.com/ai_deliver/vllm_vacc:VVI-25.12.SP2 bash + ``` + +- 安装MinerU + + - 参考官方文档安装:[README_zh-CN.md#安装-mineru](https://github.com/opendatalab/MinerU/blob/master/README_zh-CN.md#安装-mineru) + + ```bash + # 启动容器 + # sudo docker exec -it vllm_service bash + + # 可选pypi源 + # https://mirrors.163.com/pypi/simple/ + # https://mirrors.aliyun.com/pypi/simple/ + # https://pypi.mirrors.ustc.edu.cn/simple/ + # https://pypi.tuna.tsinghua.edu.cn/simple/ + # https://mirror.baidu.com/pypi/simple + + # 通过源码安装MinerU + git clone https://github.com/opendatalab/MinerU.git + git checkout 8c4b3ef3a20b11ddac9903f25124d24ea82639b5 + pip install -e .[core] -i https://mirrors.aliyun.com/pypi/simple + + # 或使用pip安装MinerU + pip install -U "mineru[core]==2.7.0" -i https://mirrors.aliyun.com/pypi/simple + ``` + +> [!NOTE] +> - `vllm_vacc`基础镜像内已包含`torch/vllm`等相关依赖 +> - 截至`2025/12/31`,`VastAI`已支持`MinerU`至最新版本`2.7.0`,`master分支8c4b3ef3` +> - 和`NVIDIA`硬件下`CUDA_VISIBLE_DEVICES`类似;在`VastAI`硬件中可以使用`VACC_VISIBLE_DEVICES`指定`可见计算卡ID`,如`-e VACC_VISIBLE_DEVICES=0,1,2,3` +> - 需指定适当的`--shm-size`虚拟内存 + +## 4. MinerU功能 + +> [!NOTE] +> - `VastAI`加速卡仅支持使用`vlm-auto-engine`和`vlm-http-client`形式进行`VLM`模型推理加速 + +- 进入容器 + ```bash + sudo docker exec -it vllm_service bash + ``` + +- 使用MinerU + + - 模型准备,参考官方介绍:[model_source.md](https://github.com/opendatalab/MinerU/blob/master/docs/zh/usage/model_source.md) + + - 方式一:`vlm-auto-engine` + + ```bash + export MINERU_MODEL_SOURCE=modelscope + + # step1, 以`vlm-auto-engine`方式启动MinerU解析任务 + mineru -p image.png \ + -o ./output \ + -b vlm-auto-engine \ + --http-timeout 1200 \ + --tensor-parallel-size 2 \ + --enforce_eager \ + --trust-remote-code \ + --max-model-len 16384 + ``` + + - 方式二:`vlm-http-client` + + ```bash + # step1, 启动vLLM API server + vllm serve /root/.cache/modelscope/hub/models/OpenDataLab/MinerU2.5-2509-1.2B \ + --tensor-parallel-size 2 \ + --trust-remote-code \ + --enforce_eager \ + --port 8090 \ + --max-model-len 16384 \ + --served-model-name MinerU2.5-2509-1.2B + + # step2,以`vlm-http-client`方式启动MinerU解析任务 + mineru -p demo/pdfs/demo1.pdf \ + -o ./output \ + -b vlm-http-client \ + -u http://127.0.0.1:8090 \ + --http-timeout 1200 + ``` + + +> [!NOTE] +> - 注意在执行任意与`vllm`相关命令需追加`--enforce_eager`参数 + + +## 5. 注意事项 + +`VastAI`加速卡对`MinerU`的支持情况如下表所示: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
使用场景支持情况
命令行工具(mineru)pipeline🔴
hybrid-http-client🔴
hybrid-auto-engine🔴
vlm-auto-engine🟢
vlm-http-client🟢
fastapi服务(mineru-api)pipeline🔴
hybrid-http-client🔴
hybrid-auto-engine🔴
vlm-auto-engine🟢
vlm-http-client🟢
gradio界面(mineru-gradio)pipeline🔴
hybrid-http-client🔴
hybrid-auto-engine🔴
vlm-auto-engine🟢
vlm-http-client🟢
openai-server服务(mineru-openai-server)🟢
Tensor并行 (--tensor-parallel-size)🟢
数据并行 (--data-parallel-size)🔴
+ + +> [!NOTE] +> - 🟢: 支持,运行较稳定,精度与NVIDIA GPU基本一致 +> - 🟡: 支持但较不稳定,在某些场景下可能出现异常,或精度存在一定差异 +> - 🔴: 不支持,无法运行,或精度存在较大差异 +> - `vlm-auto-engine`:VastAI仅支持vLLM后端 \ No newline at end of file diff --git a/docs/zh/usage/advanced_cli_parameters.md b/docs/zh/usage/advanced_cli_parameters.md new file mode 100644 index 0000000..5c94ee1 --- /dev/null +++ b/docs/zh/usage/advanced_cli_parameters.md @@ -0,0 +1,54 @@ +# 命令行参数进阶 + +## 推理引擎参数透传 + +### vllm 加速参数优化 +> [!TIP] +> 如果您已经可以正常使用vllm对vlm模型进行加速推理,但仍然希望进一步提升推理速度,可以尝试以下参数: +> +> - 如果您有超过多张显卡,可以使用vllm的多卡并行模式来增加吞吐量:`--data-parallel-size 2` + +### 参数传递说明 +> [!TIP] +> - 所有vllm/lmdeploy官方支持的参数都可用通过命令行参数传递给 MinerU,包括以下命令:`mineru`、`mineru-openai-server`、`mineru-gradio`、`mineru-api` +> - 如果您想了解更多有关`vllm`的参数使用方法,请参考 [vllm官方文档](https://docs.vllm.ai/en/latest/cli/serve.html) +> - 如果您想了解更多有关`lmdeploy`的参数使用方法,请参考 [lmdeploy官方文档](https://lmdeploy.readthedocs.io/en/latest/llm/api_server.html) + +## GPU 设备选择与配置 + +### CUDA_VISIBLE_DEVICES 基本用法 +> [!TIP] +> - 任何情况下,您都可以通过在命令行的开头添加`CUDA_VISIBLE_DEVICES` 环境变量来指定可见的 GPU 设备: +> ```bash +> CUDA_VISIBLE_DEVICES=1 mineru -p -o +> ``` +> - 这种指定方式对所有的命令行调用都有效,包括 `mineru`、`mineru-openai-server`、`mineru-gradio` 和 `mineru-api`,且对`pipeline`、`vlm`后端均适用。 + +### 常见设备配置示例 +> [!TIP] +> 以下是一些常见的 `CUDA_VISIBLE_DEVICES` 设置示例: +> ```bash +> CUDA_VISIBLE_DEVICES=1 # Only device 1 will be seen +> CUDA_VISIBLE_DEVICES=0,1 # Devices 0 and 1 will be visible +> CUDA_VISIBLE_DEVICES="0,1" # Same as above, quotation marks are optional +> CUDA_VISIBLE_DEVICES=0,2,3 # Devices 0, 2, 3 will be visible; device 1 is masked +> CUDA_VISIBLE_DEVICES="" # No GPU will be visible +> ``` + +## 实际应用场景 + +> [!TIP] +> 以下是一些可能的使用场景: +> +> - 如果您有多张显卡,需要指定卡0和卡1,并使用多卡并行来启动`openai-server`,可以使用以下命令: +> ```bash +> CUDA_VISIBLE_DEVICES=0,1 mineru-openai-server --engine vllm --port 30000 --data-parallel-size 2 +> ``` +> +> - 如果您有多张显卡,需要在卡0和卡1上启动两个`fastapi`服务,并分别监听不同的端口,可以使用以下命令: +> ```bash +> # 在终端1中 +> CUDA_VISIBLE_DEVICES=0 mineru-api --host 127.0.0.1 --port 8000 +> # 在终端2中 +> CUDA_VISIBLE_DEVICES=1 mineru-api --host 127.0.0.1 --port 8001 +> ``` diff --git a/docs/zh/usage/cli_tools.md b/docs/zh/usage/cli_tools.md new file mode 100644 index 0000000..885f850 --- /dev/null +++ b/docs/zh/usage/cli_tools.md @@ -0,0 +1,122 @@ +# 命令行工具使用说明 + +## 查看帮助信息 +要查看 MinerU 命令行工具的帮助信息,可以使用 `--help` 参数。以下是各个命令行工具的帮助信息示例: +```bash +mineru --help +Usage: mineru [OPTIONS] + +Options: + -v, --version 显示版本并退出 + -p, --path PATH 输入文件路径或目录(必填) + -o, --output PATH 输出目录(必填) + -m, --method [auto|txt|ocr] 解析方法:auto(默认)、txt、ocr(仅用于 pipeline 与 hybrid* 后端) + -b, --backend [pipeline|hybrid-auto-engine|hybrid-http-client|vlm-auto-engine|vlm-http-client] + 解析后端(默认为 hybrid-auto-engine) + -l, --lang [ch|ch_server|ch_lite|en|korean|japan|chinese_cht|ta|te|ka|th|el|latin|arabic|east_slavic|cyrillic|devanagari] + 指定文档语言(可提升 OCR 准确率,仅用于 pipeline 与 hybrid* 后端) + -u, --url TEXT 当使用 http-client 时,需指定服务地址 + -s, --start INTEGER 开始解析的页码(从 0 开始) + -e, --end INTEGER 结束解析的页码(从 0 开始) + -f, --formula BOOLEAN 是否启用公式解析(默认开启) + -t, --table BOOLEAN 是否启用表格解析(默认开启) + -d, --device TEXT 推理设备(如 cpu/cuda/cuda:0/npu/mps,仅 pipeline 后端) + --vram INTEGER 单进程最大 GPU 显存占用(GB)(仅 pipeline 后端) + --source [huggingface|modelscope|local] + 模型来源,默认 huggingface + --help 显示帮助信息 +``` +```bash +mineru-api --help +Usage: mineru-api [OPTIONS] + +Options: + --host TEXT 服务器主机地址(默认:127.0.0.1) + --port INTEGER 服务器端口(默认:8000) + --reload 启用自动重载(开发模式) + --help 显示此帮助信息并退出 +``` +```bash +mineru-gradio --help +Usage: mineru-gradio [OPTIONS] + +Options: + --enable-example BOOLEAN 启用示例文件输入(需要将示例文件放置在当前 + 执行命令目录下的 `example` 文件夹中) + --enable-http-client BOOLEAN 在后端选项中启用 HTTP 客户端选项 + --enable-api BOOLEAN 启用 Gradio API 以提供应用程序服务 + --max-convert-pages INTEGER 设置从 PDF 转换为 Markdown 的最大页数 + --server-name TEXT 设置 Gradio 应用程序的服务器主机名 + --server-port INTEGER 设置 Gradio 应用程序的服务器端口 + --latex-delimiters-type [a|b|all] + 设置在 Markdown 渲染中使用的 LaTeX 分隔符类型 + ('a' 表示 '$' 类型,'b' 表示 '()[]' 类型, + 'all' 表示两种类型都使用) + --help 显示此帮助信息并退出 +``` + +## 环境变量说明 + +MinerU命令行工具的某些参数存在相同功能的环境变量配置,通常环境变量配置的优先级高于命令行参数,且在所有命令行工具中都生效。 +以下是常用的环境变量及其说明: + +- `MINERU_DEVICE_MODE`: + * 用于指定推理设备 + * 支持`cpu/cuda/cuda:0/npu/mps`等设备类型 + * 仅对`pipeline`后端生效。 + +- `MINERU_VIRTUAL_VRAM_SIZE`: + * 用于指定单进程最大 GPU 显存占用(GB) + * 仅对`pipeline`后端生效。 + +- `MINERU_MODEL_SOURCE`: + * 用于指定模型来源 + * 支持`huggingface/modelscope/local` + * 默认为`huggingface`可通过环境变量切换为`modelscope`或使用本地模型。 + +- `MINERU_TOOLS_CONFIG_JSON`: + * 用于指定配置文件路径 + * 默认为用户目录下的`mineru.json`,可通过环境变量指定其他配置文件路径。 + +- `MINERU_FORMULA_ENABLE`: + * 用于启用公式解析 + * 默认为`true`,可通过环境变量设置为`false`来禁用公式解析。 + +- `MINERU_FORMULA_CH_SUPPORT`: + * 用于启用中文公式解析优化(实验性功能) + * 默认为`false`,可通过环境变量设置为`true`来启用中文公式解析优化。 + * 仅对`pipeline`后端生效。 + +- `MINERU_TABLE_ENABLE`: + * 用于启用表格解析 + * 默认为`true`,可通过环境变量设置为`false`来禁用表格解析。 + +- `MINERU_TABLE_MERGE_ENABLE`: + * 用于启用表格合并功能 + * 默认为`true`,可通过环境变量设置为`false`来禁用表格合并功能。 + +- `MINERU_PDF_RENDER_TIMEOUT`: + * 用于设置将PDF渲染为图片的超时时间(秒) + * 默认为`300`秒,可通过环境变量设置为其他值以调整渲染图片的超时时间。 + +- `MINERU_INTRA_OP_NUM_THREADS`: + * 用于设置onnx模型的intra_op线程数,影响单个算子的计算速度 + * 默认为`-1`(自动选择),可通过环境变量设置为其他值以调整线程数。 + +- `MINERU_INTER_OP_NUM_THREADS`: + * 用于设置onnx模型的inter_op线程数,影响多个算子的并行执行 + * 默认为`-1`(自动选择),可通过环境变量设置为其他值以调整线程数。 + +- `MINERU_HYBRID_BATCH_RATIO`: + * 用于设置 hybrid-* 后端中 小模型处理的batch倍率 + * 在hybrid-http-client中较为常用,可以通过控制小模型的batch倍率来调整单个客户端的显存占用量 + * 单个client端显存大小 | MINERU_HYBRID_BATCH_RATIO + ------------------|------------------------ + <= 6 GB | 8 + <= 4.5 GB | 4 + <= 3 GB | 2 + <= 2.5 GB | 1 + +- `MINERU_HYBRID_FORCE_PIPELINE_ENABLE`: + * 用于强制将 hybrid-* 后端中的 文本提取部分使用 小模型 进行处理 + * 默认为`false`,可通过环境变量设置为`true`来启用该功能,从而在某些极端情况下减少幻觉的发生。 \ No newline at end of file diff --git a/docs/zh/usage/index.md b/docs/zh/usage/index.md new file mode 100644 index 0000000..c4670bc --- /dev/null +++ b/docs/zh/usage/index.md @@ -0,0 +1,37 @@ +# 使用指南 + +本章节提供了项目的完整使用说明。我们将通过以下几个部分,帮助您从基础到进阶逐步掌握项目的使用方法: + +## 目录 +- 本地部署 + * [基础使用](./quick_usage.md) - 快速上手和基本使用 + * [模型源配置](./model_source.md) - 模型源的详细配置说明 + * [命令行工具](./cli_tools.md) - 命令行工具的详细参数说明 + * [命令行进阶参数](./advanced_cli_parameters.md) - 一些适配命令行工具的进阶参数说明 +- 其他加速卡适配(🚀官方支持/❤️社区贡献) + * [昇腾 Ascend](acceleration_cards/Ascend.md) 🚀 + * [平头哥 T-Head](acceleration_cards/THead.md) 🚀 + * [沐曦 METAX](acceleration_cards/METAX.md) 🚀 + * [海光 Hygon](acceleration_cards/Hygon.md) 🚀 + * [AMD](acceleration_cards/AMD.md) [#3662](https://github.com/opendatalab/MinerU/discussions/3662) ❤️ + * [太初元碁 Tecorigin](acceleration_cards/Tecorigin.md) [#3767](https://github.com/opendatalab/MinerU/pull/3767) ❤️ + * [寒武纪 Cambricon](acceleration_cards/Cambricon.md) [#4004](https://github.com/opendatalab/MinerU/discussions/4004) ❤️ + * [瀚博 VastAI](acceleration_cards/VastAI.md) [#4237](https://github.com/opendatalab/MinerU/discussions/4237)❤️ +- 插件与生态 + * [Cherry Studio](plugin/Cherry_Studio.md) + * [Sider](plugin/Sider.md) + * [Dify](plugin/Dify.md) + * [n8n](plugin/n8n.md) + * [Coze](plugin/Coze.md) + * [FastGPT](plugin/FastGPT.md) + * [ModelWhale](plugin/ModelWhale.md) + * [DingTalk](plugin/DingTalk.md) + * [DataFlow](plugin/DataFlow.md) + * [BISHENG](plugin/BISHENG.md) + * [RagFlow](plugin/RagFlow.md) + +## 开始使用 + +建议按照上述顺序阅读文档,这样可以帮助您更好地理解和使用项目功能。 + +如果您在使用过程中遇到问题,请查看 [FAQ](../faq/index.md) \ No newline at end of file diff --git a/docs/zh/usage/model_source.md b/docs/zh/usage/model_source.md new file mode 100644 index 0000000..5570ada --- /dev/null +++ b/docs/zh/usage/model_source.md @@ -0,0 +1,56 @@ +# 模型源说明 + +MinerU使用 `HuggingFace` 和 `ModelScope` 作为模型仓库,用户可以根据需要切换模型源或使用本地模型。 + +- `HuggingFace` 是默认的模型源,在全球范围内提供了优异的加载速度和极高稳定性。 +- `ModelScope` 是中国大陆地区用户的最佳选择,提供了无缝兼容的SDK模块,适用于无法访问`HuggingFace`的用户。 + +## 模型源的切换方法 + +### 通过命令行参数切换 +目前仅`mineru`命令行工具支持通过命令行参数切换模型源,其他命令行工具如`mineru-api`、`mineru-gradio`等暂不支持。 +```bash +mineru -p -o --source modelscope +``` + +### 通过环境变量切换 +在任何情况下可以通过设置环境变量来切换模型源,这适用于所有命令行工具和API调用。 +```bash +export MINERU_MODEL_SOURCE=modelscope +``` +或 +```python +import os +os.environ["MINERU_MODEL_SOURCE"] = "modelscope" +``` +>[!TIP] +> 通过环境变量设置的模型源会在当前终端会话中生效,直到终端关闭或环境变量被修改。且优先级高于命令行参数,如同时设置了命令行参数和环境变量,命令行参数将被忽略。 + + +## 使用本地模型 + +### 1. 下载模型到本地 +```bash +mineru-models-download --help +``` +或使用交互式命令行工具选择模型下载: +```bash +mineru-models-download +``` +> [!NOTE] +>- 下载完成后,模型路径会在当前终端窗口输出,并自动写入用户目录下的 `mineru.json`。 +>- 您也可以通过将[配置模板文件](https://github.com/opendatalab/MinerU/blob/master/mineru.template.json)复制到用户目录下并重命名为 `mineru.json` 来创建配置文件。 +>- 模型下载到本地后,您可以自由移动模型文件夹到其他位置,同时需要在 `mineru.json` 中更新模型路径。 +>- 如您将模型文件夹部署到其他服务器上,请确保将 `mineru.json`文件一同移动到新设备的用户目录中并正确配置模型路径。 +>- 如您需要更新模型文件,可以再次运行 `mineru-models-download` 命令,模型更新暂不支持自定义路径,如您没有移动本地模型文件夹,模型文件会增量更新;如您移动了模型文件夹,模型文件会重新下载到默认位置并更新`mineru.json`。 + +### 2. 使用本地模型进行解析 + +```bash +mineru -p -o --source local +``` +或通过环境变量启用: +```bash +export MINERU_MODEL_SOURCE=local +mineru -p -o +``` \ No newline at end of file diff --git a/docs/zh/usage/plugin/BISHENG.md b/docs/zh/usage/plugin/BISHENG.md new file mode 100644 index 0000000..dc89386 --- /dev/null +++ b/docs/zh/usage/plugin/BISHENG.md @@ -0,0 +1,11 @@ +# BISHENG 简介 + +BISHENG毕昇 是一款开源 LLM应用开发平台,主攻企业场景, 已有大量行业头部组织及世界500强企业在使用。“毕昇”是活字印刷术的发明人,活字印刷术为人类知识的传递起到了巨大的推动作用。BISHENG毕昇团队希望“BISHENG毕昇”同样能够为智能应用的广泛落地提供有力支撑。 + +![](../../../assets/Images/BISHENG_01.png) + + +- 官网地址:https://bisheng.dataelem.com/ +- Miner 在BISHENG毕昇 项目中的插件项目:https://github.com/dataelement/bisheng/pulls + +特别鸣谢 [@pzc163](https://github.com/pzc163) \ No newline at end of file diff --git a/docs/zh/usage/plugin/Cherry_Studio.md b/docs/zh/usage/plugin/Cherry_Studio.md new file mode 100644 index 0000000..5713a1c --- /dev/null +++ b/docs/zh/usage/plugin/Cherry_Studio.md @@ -0,0 +1,238 @@ +# Cherry Studio 简介 + +Cherry Studio 是一款功能强大的多模型 AI 客户端软件,支持 Windows、macOS 和 Linux 等多平台运行,集成了 OpenAI、DeepSeek、Gemini、Anthropic 等主流 AI 云服务,同时支持本地模型运行,用户可以灵活切换不同的AI模型。 + +目前,MinerU 强大的文档解析能力已深度集成到 Cherry Studio 的知识库与对话交互中,为用户带来更便捷的文档处理与信息获取体验。 + +![img](../../../assets/images/Cherry_Studio_1.png) + +- Cherry Studio 官网地址:https://www.cherry-ai.com/ + + +# MinerU 在 Cherry Studio 中的使用方法 + +## 进入 Cherry Studio 设置 + +a. 打开 Cherry Studio 应用程序 + +b. 点击左下角的"设置"按钮,进入设置页面 + +c. 在左侧菜单中,选择"MCP 服务器" + +在右侧的 MCP 服务器配置界面中,您可以看到已有的 MCP 服务器列表。点击右上角的"添加服务器"按钮来创建新的 MCP 服务,或者点击现有服务来编辑配置。 + +## 添加 MinerU-MCP 配置 + +点击"添加服务器"后,您将看到一个配置表单。请按以下步骤填写: + +**a. 名称**:输入"MinerU-MCP"或您喜欢的其他名称 + +**b. 描述**:可选,如"文档转换为Markdown工具" + +**c. 类型**:选择"标准输入/输出(stdio)" + +**d. 命令**:输入 uvx + +**e. 参数**:输入 mineru-mcp + +**f. 环境变量**:添加以下环境变量 + +```Plain +MINERU_API_BASE=https://mineru.net +MINERU_API_KEY=您的API密钥 +OUTPUT_DIR=./downloads +USE_LOCAL_API=false +LOCAL_MINERU_API_BASE=http://localhost:8888 +``` + +使用 *`uvx`* 命令可以自动处理 mineru-mcp 的安装和运行,**无需预先手动安装 mineru-mcp 包**。这是最简单的配置方式。 + +## 保存配置 + +确认无误后,点击界面右上角的"保存"按钮完成配置。保存后,MCP 服务器列表中会显示您刚刚添加的 MinerU-MCP 服务。 + +![img](../../../assets/images/Cherry_Studio_2.png) + +![img](../../../assets/images/Cherry_Studio_3.png) + +## 使用 Cherry Studio 中的 MinerU MCP + +一旦配置完成,您可以在 Cherry Studio 中的对话中使用 MinerU MCP 工具。在 Cherry Studio 中,您可以使用如下提示让模型调用 MinerU MCP 工具。模型会自动识别任务并调用相应的工具。 + +## 示例 1: 使用 URL 转换文档 + +**用户输入:** + +```Plain +请使用 MinerU MCP 将以下 URL 的 PDF 文档转换为 Markdown 格式:https://example.com/sample.pdf +``` + +**模型将执行的步骤:** + +模型识别这是文档转换任务,并调用 *`parse_documents`* 工具,参数为: + +```Plain +{"file_sources": "https://example.com/sample.pdf"} +``` + +工具处理完成后,模型会告知您转换结果。 + +![img](../../../assets/images/Cherry_Studio_4.png) + +## 示例 2: 转换本地文档 + +**用户输入:** + +```Plain +请使用 MinerU-MCP 将本地的 D://sample.pdf 文件转换为 Markdown 格式 +``` + +**模型将执行的步骤:** + +模型识别这是本地文档转换任务,调用 `parse_documents` 工具,参数为: + +```Plain +{"file_sources": "D://sample.pdf"} +``` + +![img](../../../assets/images/Cherry_Studio_5.png) + +## 示例 3: 启用 OCR 处理扫描文档 + +**用户输入:** + +```Plain +请使用 MinerU-MCP 将以下 URL 的扫描版 PDF 文档转换为 Markdown 格式,并启用 OCR 功能: +https://example.com/scanned.pdf +``` + +**模型将执行的步骤:** + +模型识别这是需要 OCR 处理的文档转换任务,调用 `parse_documents` 工具,并启用 OCR 参数: + +```Plain +{"file_sources": "https://example.com/scanned.pdf","enable_ocr": true} +``` + +![img](../../../assets/images/Cherry_Studio_6.png) + +## 示例 4: 完整对话流程 + +以下是一个完整的对话流程示例: + +**用户:** + +```Plain +我有一份学术论文的 PDF,网址是 https://arxiv.org/pdf/2303.08774.pdf,能帮我转换成 Markdown 格式吗? +``` + +**模型:** + +```Plain +我可以帮您将这份学术论文转换为 Markdown 格式。我将使用 MinerU-MCP 工具来处理这个任务。 +[模型调用 parse_documents 工具,处理过程中...] +论文已成功转换为 Markdown 格式。转换结果如下: +# The Capacity of Diffusion Models to Memorize and Generate Training Data +## Abstract +Recent diffusion models can generate high-quality images that are nearly indistinguishable from real ones... +[显示论文内容...] +``` + +![img](../../../assets/images/Cherry_Studio_7.png) + +## 工具参数详解 + +在使用过程中,模型会根据您的指令自动选择合适的工具和参数。以下是主要工具的参数说明: + +### ● *parse_documents* 工具参数 + +![img](../../../assets/images/Cherry_Studio_8.png) + +### ● *get_ocr_languages* 工具参数 + +无需参数,用于获取OCR支持的语言列表。 + +## 高级用法 + +### 指定语言和页码范围 + +**用户输入:** + +```Plain +请使用 MinerU MCP 将以下 URL 的文档转换为 Markdown 格式,只处理第 5-10 页,并指定语言为中文:https://example.com/document.pdf +``` + +模型会使用 *`parse_documents`* 工具,并设置 *`language`* 参数为 "ch",*`page_ranges`* 参数为 "5-10"。 + +### 批量处理多个文档 + +**用户输入:** + +```Plain +请使用 MinerU-MCP 将以下多个 URL 的文档转换为 Markdown 格式: +https://example.com/doc1.pdf +https://example.com/doc2.pdf +https://example.com/doc3.pdf +``` + +模型会调用 *`parse_documents`* 工具,并将多个 URL 以逗号分隔传入 *`file_sources`* 参数。 + +## 注意事项 + +● 当设置 *`USE_LOCAL_API=true`* 时,使用本地配置的API进行解析 + +● 当设置 *`USE_LOCAL_API=false`* 时,会使用 MinerU 官网的API进行解析 + +● 处理大型文档可能需要较长时间,请耐心等待 + +● 如果遇到超时问题,请考虑分批处理文档或使用本地API模式 + +## 常见问题与解决方案 + +### 无法启动 MCP 服务 + +**问题**:运行 *`uv run -m mineru.cli`*` `时报错。 + +**解决方案**: + +● 确保已激活虚拟环境 + +● 检查是否已安装所有依赖 + +● 尝试使用 *`python -m mineru.cli`*` `命令替代 + +### 文件转换失败 + +**问题**:文件上传成功但转换失败。 + +**解决方案**: + +● 检查文件格式是否受支持 + +● 确认API密钥是否正确 + +● 查看MCP服务日志获取详细错误信息 + +### 文件路径问题 + +**问题**:使用 `parse_documents` 工具处理本地文件时报找不到文件错误。 + +**解决方案**:请确保使用绝对路径,或者相对于服务器运行目录的正确相对路径。 + +### MCP 服务调用超时问题 + +**问题**:调用 *`parse_documents`* 工具时出现 *`Error calling tool 'parse_documents': MCP error -32001: Request timed out`* 错误。 + +**解决方案**:这个问题常见于处理大型文档或网络不稳定的情况。在某些 MCP 客户端(如 Cursor)中,超时后可能导致无法再次调用 MCP 服务,需要重启客户端。最新版本的 Cursor 中可能会显示正在调用 MCP,但实际上没有真正调用成功。建议: + +**● 等待官方修复**:这是Cursor客户端的已知问题,建议等待Cursor官方修复 + +**● 处理小文件**:尽量只处理少量小文件,避免处理大型文档导致超时 + +**● 分批处理**:将多个文件分成多次请求处理,每次只处理一两个文件 + +● 增加超时时间设置(如果客户端支持) + +● 对于超时后无法再次调用的问题,需要重启 MCP 客户端 + +● 如果反复出现超时,请检查网络连接或考虑使用本地 API 模式 \ No newline at end of file diff --git a/docs/zh/usage/plugin/Coze.md b/docs/zh/usage/plugin/Coze.md new file mode 100644 index 0000000..5ddfcae --- /dev/null +++ b/docs/zh/usage/plugin/Coze.md @@ -0,0 +1,92 @@ +# Coze 简介 + +Coze(中文版名称:扣子) 是字节跳动推出的零代码 AI 应用开发平台。无论用户是否有编程经验,都可以通过该平台快速创建各种类型的聊天机器人、智能体、AI 应用和插件,并将其部署在社交平台和即时聊天应用程序中。 + +目前,MinerU 插件已在 Coze 插件商店上线,通过其强大的文档解析能力,为用户搭建智能体与工作流提供文档解析能力,加快用户 AI 应用的开发。 + +![img](../../../assets/images/coze_0.png) + +- 扣子官网地址:https://www.coze.cn/ +- MinerU 扣子插件下载地址:https://www.coze.cn/store/plugin/7527957359730360354 + +# MinerU 在 Coze 中的使用方法 + +## **Coze:集成应用** + +- 进入 https://www.coze.cn/home coze 开发平台 + +## 智能体 + +### 工作空间 -> 项目开发 -> 创建 -> 创建智能体 -> 创建 -> 输入项目名 + +![img](../../../assets/images/Coze_1.png) + +![img](../../../assets/images/Coze_2.png) + +### 插件配置 -> 添加 `插件` -> 搜索 `MinerU` + +![img](../../../assets/images/Coze_3.png) + +### 添加 `parse_file` 工具(在线版) + +![img](../../../assets/images/Coze_4.png) + +### 选择 `MinerU` 插件 -> 编辑参数 -> 填写 api key + +![img](../../../assets/images/Coze_5.png) + +![img](../../../assets/images/Coze_6.png) + +> 记得关闭 url 和 token 显示 + +### 调试 `智能体` + +![img](../../../assets/images/Coze_7.png) + +## 工作流 + +> 用工作流的方式使用 minerU + +### 工作流 -> 创建工作流 + +![img](../../../assets/images/Coze_8.png) + +![img](../../../assets/images/Coze_9.png) + +### 工作流插件配置 -> 添加 `插件` -> 搜索 `MinerU` -> 添加 + +![img](../../../assets/images/Coze_10.png) + +![img](../../../assets/images/Coze_11.png) + +### 选择`MinerU` 插件 -> 编辑参数 -> 填写 api key + +![img](../../../assets/images/Coze_12.png) + +### 选择开始节点 -> 配置 `input` 类型为文件类型 -> 连接到 `mineru` 节点 + +![img](../../../assets/images/Coze_13.png) + +![img](../../../assets/images/Coze_14.png) + +### 选择结束节点 -> 连接到 `mineru` 节点 -> 配置 `output` 输出为 `mineru` 节点的 `parse_file.text` + +![img](../../../assets/images/Coze_15.png) + +![img](../../../assets/images/Coze_16.png) + +### 上传文件 -> 试运行 + +![img](../../../assets/images/Coze_17.png) + +![img](../../../assets/images/Coze_18.png) + +### 发布 -> 添加到当前智能体 + +![img](../../../assets/images/Coze_19.png) + +![img](../../../assets/images/Coze_20.png) + +### 移除 `mineru` 插件 -> 调试 + +![img](../../../assets/images/Coze_21.png) \ No newline at end of file diff --git a/docs/zh/usage/plugin/DataFlow.md b/docs/zh/usage/plugin/DataFlow.md new file mode 100644 index 0000000..c0d5743 --- /dev/null +++ b/docs/zh/usage/plugin/DataFlow.md @@ -0,0 +1,11 @@ +# 元枢智汇 ADP 智能数据平台 简介 + +元枢智汇 ADP 智能数据平台基于自研 AI 数据库和 DataFlow数据准备框架打造,旨在帮助企业高效管理、检索、处理海量数据,并通过体系化、自动化数据治理降低模型/智能体训练的专业门槛,帮助企业结合业务场景发挥私有数据的价值,真正落地AI应用。 + +目前,MinerU 已深度集成于元枢智汇 ADP 智能数据平台的 DataFlow 模块中,其数据解析服务由文档语料提取引擎 MinerU 提供支持。 + +![](../../../assets/images/DataFLow_01.png) +![](../../../assets/images/DataFLow_02.png) + +- 官网地址:https://adp.originhub.tech/agent +- Miner fastGPT 插件下载地址:https://cloud.fastgpt.io/dashboard/systemPlugin?type=productivity \ No newline at end of file diff --git a/docs/zh/usage/plugin/Dify.md b/docs/zh/usage/plugin/Dify.md new file mode 100644 index 0000000..d8b7bee --- /dev/null +++ b/docs/zh/usage/plugin/Dify.md @@ -0,0 +1,171 @@ +# Dify 简介 + +**Dify** 是一个开源的大语言模型(LLM)应用开发平台,旨在简化和加速生成式 AI 应用的创建和部署。它结合了后端即服务(BaaS)和 LLMOps 的理念,为开发者提供了用户友好的界面和强大的工具,有效降低了 AI 应用开发的门槛。 + +目前 MinerU 与 Dify 联合研发的 MinerU 插件已在 Dify 市场上架,帮助用户搭建工作流,提供文档解析的工作。 + +![img](../../../assets/images/Dify_2.png) + +- Dify 官网地址:https://dify.ai/zh +- MinerU Dify 插件下载地址:https://marketplace.dify.ai/plugins/langgenius/mineru + +# MinerU 在 Dify 中的使用方法 + +## 一、**新版MinerU Dify插件亮点 (v0.4.0)** + +- **完美适配MinerU2**:全面兼容MinerU2的最新功能,释放顶尖的文档解析能力。 +- **超高灵活性**:同时支持官方在线API和本地化部署的API(并向下兼容 1.x 版本)。 +- **赋能工作流**:让Dify的Agent拥有强大的文档“读写”能力,轻松处理复杂任务。 + + +## **二、实战演练:两个案例带你快速上手** + +空谈不如实战。下面我们通过两个典型场景,向你展示新版插件的强大之处。 + +### 准备 + +1. 在Dify插件页面安装MinerU插件(私有化部署的Dify同理) + + +2. 填写API URL等信息 + +![img](../../../assets/images/Dify_3.png) + +使用官方API时令牌(Token)必须提供👆,使用本地部署API时令牌可不填写👇 + +![img](../../../assets/images/Dify_4.png) + +### **案例一:解析单文件,搭建Chat PDF应用** + +想借助AI与你的文档对话吗?跟着下面几步,轻松实现 + +#### 第一步:创建空白应用,选择“Chatflow” + +输入应用名称与描述 + +![img](../../../assets/images/Dify_5.png) + +#### 第二步:创建的初始模板中,选择“开始”节点 + +字段类型选为单文件,填写变量名称(此处填为input_file),支持文档类型选为文档与图片 + +![img](../../../assets/images/Dify_6.png) + +#### 第三步:添加工具节点——MinerU插件来解析上一步开始节点上传的文件 + +![img](../../../assets/images/Dify_7.png) + +#### 第四步:设置MinerU的输入变量,选择上一步开始节点添加的 `input_file` + +![img](../../../assets/images/Dify_8.png) + +#### 第五步:配置LLM模型 + +选择“LLM”节点后,如果没有模型可用,需要单独在插件市场安装(这里使用 Deepseek作为示例) + +“上下文”选择MinerU的输出变量 `text`(MinerU解析文档后的markdown格式) + +![img](../../../assets/images/Dify_9.png) + +在“SYSTEM”区域根据实际需求填写提示词,可如图填写“在Parse File `text`中提取用户的问题答案” + +![img](../../../assets/images/Dify_10.png) + +#### 第六步:预览,上传文件并提问机器人关于文档的内容 + +至此一个简单的文档问答应用Chat PDF搭建完成,点击“预览”,查看效果如何👇 + +![img](../../../assets/images/Dify_11.png) + +结果如下: + +![img](../../../assets/images/Dify_12.png) + +#### **第七步:发布与测试** + +保存并发布你的应用。现在,上传一份PDF或图片,你就可以和它自由对话了! + +![img](../../../assets/images/Dify_13.png) + +### **案例二:自动化批量处理文档,并上传至云端S3** + +需要处理大量文档并归档?MinerU 插件同样能胜任 + +#### 第一步:安装 botos3 插件 + +![img](../../../assets/images/Dify_14.png) + +#### 第二步:配置 S3 bucket + +![img](../../../assets/images/Dify_15.png) + +#### 第三步:创建工作流 + +选择字段类型为“文件列表”,填写变量名称(此处填为input_files),支持的文档类型选为文档与图片 + +![img](../../../assets/images/Dify_16.png) + +#### 第四步:添加“迭代” + +在“开始”节点后添加“迭代”,并配置迭代内的MinerU节点,设置迭代的输入为上一步开始节点的`upload_files`,输出节点暂时不填写,再整个迭代配置完成后选择MinerU节点Parse File的`full_zip_url` + +![img](../../../assets/images/Dify_17.png) + +将MinerU的输入参数file选择为迭代器的 `item` + +![img](../../../assets/images/Dify_18.png) + +![img](../../../assets/images/Dify_19.png) + +#### 第五步:增加中间节点“代码执行”来转换MinerU的解析结果 + +**输入变量(变量名称需与代码定义一致)** + +- **text:**选择MinerU Parse File的输出变量`text` +- **uploadFiles:**选择“开始”节点的文件列表`upload_files`,用来根据迭代的index索引下标找到对应的原始文件名 +- **index:**迭代的下标索引,选择迭代器的`index` + +**输出变量(变量名称需与代码定义一致)** + +- **fileName:**String +- **base64:**String + +![img](../../../assets/images/Dify_20.png) + +代码选择JavaScript,编写转换代码: + +暂时无法在飞书文档外展示此内容 + +以下为Python版本: + +暂时无法在飞书文档外展示此内容 + +#### 第六步:配置 Botos3 插件来上传内容 + +添加工具节点Botos3,选择“通过s3上传base64” + +![img](../../../assets/images/Dify_21.png) + +文件base64选择代码执行(图中为**转换MINERU MD文本**)输出的base64字段 + +![img](../../../assets/images/Dify_22.png) + +S3对象key,S3 对象key填写文件存储的路径,在 botos3 插件配置界面已经填写了 bucket 名称,这里只需要填写在bucket下存储的目录即可。选择代码执行**(图中为转换MINERU MD文本)**的`fileName` + +![img](../../../assets/images/Dify_23.png) + +#### 第七步:预览效果 + +连接结束节点,至此,一个简单的上传到s3的工作流配置完成,点击“运行”看看效果👇: + +![img](../../../assets/images/Dify_24.png) + +![img](../../../assets/images/Dify_25.png) + +#### 第八步:Vis3查看文档 + +运行结束,可通过[vis3](https://github.com/opendatalab/Vis3?tab=readme-ov-file#features)来查看S3桶内是否已上传解析后的md文件,Vis3使用可参考 + +[新工具开源!Vis3大模型数据可视化利器:填 AK/SK 直接预览 S3 数据,JSON/视频/图片秒开!本地文件也可用](https://mp.weixin.qq.com/s/p3rH4EaoJB-AK7RWeDvOhg) + +![img](../../../assets/images/Dify_26.png) \ No newline at end of file diff --git a/docs/zh/usage/plugin/DingTalk.md b/docs/zh/usage/plugin/DingTalk.md new file mode 100644 index 0000000..18c0326 --- /dev/null +++ b/docs/zh/usage/plugin/DingTalk.md @@ -0,0 +1,12 @@ +# 钉钉简介 + +钉钉(DingTalk)是阿里巴巴集团打造的企业级智能移动办公平台,是数字经济时代的企业组织协同办公和应用开发平台。钉钉整合了 IM 即时沟通、钉钉文档、钉闪会、钉盘、Teambition、OA审批、智能人事、钉工牌、工作台等功能,旨在实现简单、高效、安全、智能的数字化工作方式。它支持企业组织数字化和业务数字化,覆盖“人、财、物、事、产、供、销、存”的全链路管理。 + +通过钉钉开放平台上的SaaS软件,企业可低成本搭建数字化应用,整合所有数字化系统。此外,钉钉提供超过2000个API接口,为企业数字化转型提供开放兼容环境。不会代码的用户也可利用低代码工具构建CRM、ERP、OA、项目管理、进销存等系统。 + +目前,钉钉文档、AI 表格等产品此前已深度集成 MinerU 能力,并通过开放平台向生态开发者开放文档解析功能,为 DLU 的联合研发提供了扎实的技术与场景基础。 + +![](../../../assets/images/DingTalk_01.png) + + +- 钉钉官网:https://www.dingtalk.com/ \ No newline at end of file diff --git a/docs/zh/usage/plugin/FastGPT.md b/docs/zh/usage/plugin/FastGPT.md new file mode 100644 index 0000000..cf32e8c --- /dev/null +++ b/docs/zh/usage/plugin/FastGPT.md @@ -0,0 +1,13 @@ +# FastGPT 简介 + +FastGPT 是一个基于 LLM 大语言模型的知识库问答系统,将智能对话与可视化编排完美结合,让 AI 应用开发变得简单自然。无论您是开发者还是业务人员,都能轻松打造专属的 AI 应用。 + +目前,MinerU 插件已在 Coze 插件商店上线,通过其强大的文档解析能力,为用户搭建智能体与工作流提供文档解析能力,加快用户 AI 应用的开发。 + + +![img](../../../assets/images/FastGPT_01.png) + +![img](../../../assets/images/FastGPT_02.png) + +- 官网地址:https://fastgpt.cn +- Miner fastGPT 插件下载地址:https://cloud.fastgpt.io/dashboard/systemPlugin?type=productivity \ No newline at end of file diff --git a/docs/zh/usage/plugin/ModelWhale.md b/docs/zh/usage/plugin/ModelWhale.md new file mode 100644 index 0000000..c3aaf1d --- /dev/null +++ b/docs/zh/usage/plugin/ModelWhale.md @@ -0,0 +1,18 @@ +# ModelWhale 简介 + +ModelWhale是一款高效率的数据科学云端协作工具,为数据工作者提供了即开即用的云端分析环境,Jupyter Notebook 交互式和Canvas 拖拽式两种分析界面,帮助科研者、教育工作者解决底层工程繁复、数据难以安全应用、成果流转复现困难等问题。基于不同使用场景,ModelWhale 为用户提供三个产品版本,分别是基础版、专业版、团队版。 + +目前,MinerU 插件已在 ModelWhale 工作中,通过其强大的文档解析能力,为用户搭建智能体与工作流提供文档解析能力,加快用户 AI 应用的开发。 + +images/DingTalk_01.png + + + +![](../../../assets/images/ModelWhale_01.png) + +![](../../../assets/images/ModelWhale_02.png) + + + +- ModelWhale 官网:Mohttps://www.modelwhale.com/pricing?scroll=1 +- MinerU 在ModelWhale 的使用地址:https://www.heywhale.com/org/7b38d/workspace/iframe?url=https://www.heywhale.com/api/model/services/68089d360b1519a862ccb9b4/app/ diff --git a/docs/zh/usage/plugin/RagFlow.md b/docs/zh/usage/plugin/RagFlow.md new file mode 100644 index 0000000..6db7f31 --- /dev/null +++ b/docs/zh/usage/plugin/RagFlow.md @@ -0,0 +1,84 @@ +## RAGFlow + +RAGFlow 是一款开源 RAG(Retrieval-Augmented Generation)引擎与应用平台,深度融合了深度文档理解、自动化 RAG 工作流与大模型调用,打通了复杂数据处理、知识检索、增强生成的全流程,旨在为企业及开发者提供一站式智能问答开发服务,并支持各类复杂场景下大模型的构建与应用落地。 + +目前,MinerU 已深度集成至 RAGFlow 知识库在线版本,作为内置 PDF 文档解析器,为用户知识库搭建提供专业、可靠的文档解析支持。本地部署版本部署使用方式详见下方使用教程。 + +使用可访问:https://demo.ragflow.io/ + +![img](../../../assets/images/RagFlow_01.png) + +## 使用教程:如何在 RAGFlow 中使用 MinerU + +### 一、安装配置 + +首先,我们建议您通过 docker 的形式在本地部署 RagFlow 以方便使用 MinerU 插件作为解析工具。在安装完 RagFlow 后执行: + +1. **版本检查:** + + 确保你的RAGFlow版本 >= `v0.21.1`。 + +2. **更新 .env 文件:** + + 为了确保服务能被平稳修改,建议先在 `cmd` 运行 `docker compose down` 停掉服务。 + + 打开 `.env` 文件,在文件的末尾,添加这两行代码,保存文件。 + + ```Python + HF_ENDPOINT=https://hf-mirror.com + MINERU_EXECUTABLE=/ragflow/uv_tools/.venv/bin/mineru + ``` + +3. **启动并进入容器:** + + 在 `cmd` 中,重新启动服务:`docker compose up -d` + + 等待服务全部 `Running` 或 `Healthy` 后,运行以下命令进入RAGFlow的核心容器: + + ```Bash + docker compose exec ragflow-cpu bash + ``` + + (你的命令行提示符会从 `C:\...>` 变为 `root@...`) + +4. **在容器内下载 MinerU 模型:** + + 在容器内部,依次运行以下 5 条命令 + + ```Bash + mkdir uv_tools + cd uv_tools + uv venv .venv + source .venv/bin/activate + uv pip install -U "mineru[core]" -i https://mirrors.aliyun.com/pypi/simple + ``` + +5. **退出并重启:** + + 安装完成后,输入 `exit` 并按回车。 + + 运行重启命令,让 RAGFlow 加载刚装好的 MinerU + + ```Bash + docker compose restart ragflow-cpu + ``` + +### 二、使用入口 + +在本地部署完毕后,要启用 MinerU,您需要在进入 RagFlow 特定知识库的配置页面并选择 MinerU 作为默认的 PDF 解析器。(注:RagFlow 在线版中已经内置了 MinerU 插件为您提供了高级的 PDF 文件解析能力,使用方式与此一致。) + +**入口和配置步骤:** + +1. **进入知识库配置:** + 1. 首先,在您的知识库管理界面,选择您需要配置的特定知识库(例如图示中的 "content" 知识库)。 + 2. 在知识库详情页面的左侧导航栏中,点击【**配置**】选项卡。 +2. **定位 PDF 解析器设置:** + 1. 向下滚动页面,找到“**Ingestion pipeline**”(摄取管道)设置部分。 + 2. 在此部分中,您会看到一个名为【**PDF解析器**】(PDF Parser)的选项。 +3. **选择 MinerU:** + 1. 点击【PDF解析器】旁边的下拉菜单。 + 2. 从可用选项中,选择【**MinerU**】。 +4. **保存修改:** + 1. 完成选择后,请务必点击页面底部的【**保存**】按钮,以使更改生效。 + +![img](../../../assets/images/RagFlow_02.png) \ No newline at end of file diff --git a/docs/zh/usage/plugin/Sider.md b/docs/zh/usage/plugin/Sider.md new file mode 100644 index 0000000..b5f2273 --- /dev/null +++ b/docs/zh/usage/plugin/Sider.md @@ -0,0 +1,10 @@ +# Sider 简介 + +Sider 是一款浏览器侧边栏类的 AI 助手扩展,主要在网页右侧开启一个“随处可用”的智能面板,将对话式 AI(如 GPT、Claude、Gemini 等)带到你正在浏览的任何页面中。它的核心定位是:提升阅读、写作、翻译、检索与总结效率,并与网页内容深度联动。 + +目前,Sider在 Wisebase 模块中深度集成了 MinerU 的相关功能。该模块是一个由AI驱动的知识库,您可以通过上传 PDF 等各类型文件,构建个人图书馆以实现高效的知识管理,MinerU 可以帮助您更好地解析此类文件,精准地提取文件中的信息。 + +![img](../../../assets/images/Sider_1.png) + +- Sider 官网地址:https://sider.ai/zh-CN/chat +- 使用集成 MinerU 相关功能的 Sider 地址:https://sider.ai/zh-CN/wisebase \ No newline at end of file diff --git a/docs/zh/usage/plugin/n8n.md b/docs/zh/usage/plugin/n8n.md new file mode 100644 index 0000000..fa5bb2b --- /dev/null +++ b/docs/zh/usage/plugin/n8n.md @@ -0,0 +1,54 @@ +# n8n 简介 + +**n8n** 是一款以低代码(Low-code)、工作流自动化为核心的应用开发平台,许多企业都借助于其灵活的节点(Node)配置,实现业务流程的自动化执行。它通过可视化界面和代码扩展能力,帮助用户连接各种应用程序和服务,构建复杂的自动化流程,降低用户使用门槛。 + +目前,MinerU 已将其强大的文档解析能力封装为 n8n 节点,用户在搭建工作流时,可以更加便捷地处理复杂的文档解析任务。 + +![img](../../../assets/images/n8n_0.png) + +- n8n 官网地址:https://n8n.io/ +- MinerU n8n 插件下载地址:https://www.npmjs.com/package/n8n-nodes-mineru + +# MinerU 在 n8n 中的使用方法 + +## step1 进入社区node安装界面 + +![img](../../../assets/images/n8n_1.png) + +## step2 安装 n8n-nodes-mineru 节点 + +≈assets/images/n8n_2.png) + +## step3 新建工作流,添加 n8n-nodes-mineru 节点,并设置 api key + +![img](../../../assets/images/n8n_3.png) + +![img](../../../assets/images/n8n_4.png) + +![img](../../../assets/images/n8n_5.png) + +![img](../../../assets/images/n8n_6.png) + +### n8n使用节点文档 + +https://www.npmjs.com/package/n8n-nodes-mineru + +### **在工作流内集成解压功能** + +#### 导入 json 模板 + +暂时无法在飞书文档外展示此内容 + +![img](../../../assets/images/n8n_7.png) + +### 配置 凭证和文档url + +![img](../../../assets/images/n8n_8.png) + +### 根据各自的需求配置所需的输出 + +![img](../../../assets/images/n8n_9.png) + +### 调试 + +![img](../../../assets/images/n8n_10.png) \ No newline at end of file diff --git a/docs/zh/usage/quick_usage.md b/docs/zh/usage/quick_usage.md new file mode 100644 index 0000000..835a9c4 --- /dev/null +++ b/docs/zh/usage/quick_usage.md @@ -0,0 +1,99 @@ +# 使用 MinerU + +## 快速配置模型源 +MinerU默认使用`huggingface`作为模型源,若用户网络无法访问`huggingface`,可以通过环境变量便捷地切换模型源为`modelscope`: +```bash +export MINERU_MODEL_SOURCE=modelscope +``` +有关模型源配置和自定义本地模型路径的更多信息,请参考文档中的[模型源说明](./model_source.md)。 + +## 通过命令行快速使用 +MinerU内置了命令行工具,用户可以通过命令行快速使用MinerU进行PDF解析: +```bash +mineru -p -o +``` +> [!TIP] +> - ``:本地 PDF/图片 文件或目录 +> - ``:输出目录 +> +> 更多关于输出文件的信息,请参考[输出文件说明](../reference/output_files.md)。 + +> [!NOTE] +> 命令行工具会在Linux和macOS系统自动尝试cuda/mps加速。Windows用户如需使用cuda加速, +> 请前往 [Pytorch官网](https://pytorch.org/get-started/locally/) 选择适合自己cuda版本的命令安装支持加速的`torch`和`torchvision`。 + +如果需要通过自定义参数调整解析选项,您也可以在文档中查看更详细的[命令行工具使用说明](./cli_tools.md)。 + +## 通过api、webui、http-client/server进阶使用 + +- 通过python api直接调用:[Python 调用示例](https://github.com/opendatalab/MinerU/blob/master/demo/demo.py) +- 通过fast api方式调用: + ```bash + mineru-api --host 0.0.0.0 --port 8000 + ``` + >[!TIP] + >在浏览器中访问 `http://127.0.0.1:8000/docs` 查看API文档。 +- 启动gradio webui 可视化前端: + ```bash + mineru-gradio --server-name 0.0.0.0 --server-port 7860 + ``` + >[!TIP] + > + >- 在浏览器中访问 `http://127.0.0.1:7860` 使用 Gradio WebUI。 + +- 使用`http-client/server`方式调用: + ```bash + # 启动openai兼容服务器(需要安装vllm或lmdeploy环境) + mineru-openai-server --port 30000 + ``` + >[!TIP] + >在另一个终端中通过http client连接openai server + > ```bash + > mineru -p -o -b hybrid-http-client -u http://127.0.0.1:30000 + > ``` + +> [!NOTE] +> 所有`vllm/lmdeploy`官方支持的参数都可用通过命令行参数传递给 MinerU,包括以下命令:`mineru`、`mineru-openai-server`、`mineru-gradio`、`mineru-api`, +> 我们整理了一些`vllm/lmdeploy`使用中的常用参数和使用方法,可以在文档[命令行进阶参数](./advanced_cli_parameters.md)中获取。 + +## 基于配置文件扩展 MinerU 功能 + +MinerU 现已实现开箱即用,但也支持通过配置文件扩展功能。您可通过编辑用户目录下的 `mineru.json` 文件,添加自定义配置。 + +>[!IMPORTANT] +>`mineru.json` 文件会在您使用内置模型下载命令 `mineru-models-download` 时自动生成,也可以通过将[配置模板文件](https://github.com/opendatalab/MinerU/blob/master/mineru.template.json)复制到用户目录下并重命名为 `mineru.json` 来创建。 + +以下是一些可用的配置选项: + +- `latex-delimiter-config`: + * 用于配置 LaTeX 公式的分隔符 + * 默认为`$`符号,可根据需要修改为其他符号或字符串。 + +- `llm-aided-config`: + * 用于配置 LLM 辅助标题分级的相关参数,兼容所有支持`openai协议`的 LLM 模型 + * 默认使用`阿里云百炼`的`qwen3-next-80b-a3b-instruct`模型 + * 您需要自行配置 API 密钥并将`enable`设置为`true`来启用此功能 + * 如果您的api供应商不支持`enable_thinking`参数,请手动将该参数删除 + * 例如,在您的配置文件中,`llm-aided-config` 部分可能如下所示: + ```json + "llm-aided-config": { + "api_key": "your_api_key", + "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "model": "qwen3-next-80b-a3b-instruct", + "enable_thinking": false, + "enable": false + } + ``` + * 要移除`enable_thinking`参数,只需删除包含`"enable_thinking": false`的那一行,结果如下: + ```json + "llm-aided-config": { + "api_key": "your_api_key", + "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1", + "model": "qwen3-next-80b-a3b-instruct", + "enable": false + } + ``` + +- `models-dir`: + * 用于指定本地模型存储目录,请为`pipeline`和`vlm`后端分别指定模型目录, + * 指定目录后您可通过配置环境变量`export MINERU_MODEL_SOURCE=local`来使用本地模型。 diff --git a/lib/bindings/utils.js b/lib/bindings/utils.js new file mode 100644 index 0000000..088effe --- /dev/null +++ b/lib/bindings/utils.js @@ -0,0 +1,189 @@ +function neighbourhoodHighlight(params) { + // console.log("in nieghbourhoodhighlight"); + allNodes = nodes.get({ returnType: "Object" }); + // originalNodes = JSON.parse(JSON.stringify(allNodes)); + // if something is selected: + if (params.nodes.length > 0) { + highlightActive = true; + var i, j; + var selectedNode = params.nodes[0]; + var degrees = 2; + + // mark all nodes as hard to read. + for (let nodeId in allNodes) { + // nodeColors[nodeId] = allNodes[nodeId].color; + allNodes[nodeId].color = "rgba(200,200,200,0.5)"; + if (allNodes[nodeId].hiddenLabel === undefined) { + allNodes[nodeId].hiddenLabel = allNodes[nodeId].label; + allNodes[nodeId].label = undefined; + } + } + var connectedNodes = network.getConnectedNodes(selectedNode); + var allConnectedNodes = []; + + // get the second degree nodes + for (i = 1; i < degrees; i++) { + for (j = 0; j < connectedNodes.length; j++) { + allConnectedNodes = allConnectedNodes.concat( + network.getConnectedNodes(connectedNodes[j]) + ); + } + } + + // all second degree nodes get a different color and their label back + for (i = 0; i < allConnectedNodes.length; i++) { + // allNodes[allConnectedNodes[i]].color = "pink"; + allNodes[allConnectedNodes[i]].color = "rgba(150,150,150,0.75)"; + if (allNodes[allConnectedNodes[i]].hiddenLabel !== undefined) { + allNodes[allConnectedNodes[i]].label = + allNodes[allConnectedNodes[i]].hiddenLabel; + allNodes[allConnectedNodes[i]].hiddenLabel = undefined; + } + } + + // all first degree nodes get their own color and their label back + for (i = 0; i < connectedNodes.length; i++) { + // allNodes[connectedNodes[i]].color = undefined; + allNodes[connectedNodes[i]].color = nodeColors[connectedNodes[i]]; + if (allNodes[connectedNodes[i]].hiddenLabel !== undefined) { + allNodes[connectedNodes[i]].label = + allNodes[connectedNodes[i]].hiddenLabel; + allNodes[connectedNodes[i]].hiddenLabel = undefined; + } + } + + // the main node gets its own color and its label back. + // allNodes[selectedNode].color = undefined; + allNodes[selectedNode].color = nodeColors[selectedNode]; + if (allNodes[selectedNode].hiddenLabel !== undefined) { + allNodes[selectedNode].label = allNodes[selectedNode].hiddenLabel; + allNodes[selectedNode].hiddenLabel = undefined; + } + } else if (highlightActive === true) { + // console.log("highlightActive was true"); + // reset all nodes + for (let nodeId in allNodes) { + // allNodes[nodeId].color = "purple"; + allNodes[nodeId].color = nodeColors[nodeId]; + // delete allNodes[nodeId].color; + if (allNodes[nodeId].hiddenLabel !== undefined) { + allNodes[nodeId].label = allNodes[nodeId].hiddenLabel; + allNodes[nodeId].hiddenLabel = undefined; + } + } + highlightActive = false; + } + + // transform the object into an array + var updateArray = []; + if (params.nodes.length > 0) { + for (let nodeId in allNodes) { + if (allNodes.hasOwnProperty(nodeId)) { + // console.log(allNodes[nodeId]); + updateArray.push(allNodes[nodeId]); + } + } + nodes.update(updateArray); + } else { + // console.log("Nothing was selected"); + for (let nodeId in allNodes) { + if (allNodes.hasOwnProperty(nodeId)) { + // console.log(allNodes[nodeId]); + // allNodes[nodeId].color = {}; + updateArray.push(allNodes[nodeId]); + } + } + nodes.update(updateArray); + } +} + +function filterHighlight(params) { + allNodes = nodes.get({ returnType: "Object" }); + // if something is selected: + if (params.nodes.length > 0) { + filterActive = true; + let selectedNodes = params.nodes; + + // hiding all nodes and saving the label + for (let nodeId in allNodes) { + allNodes[nodeId].hidden = true; + if (allNodes[nodeId].savedLabel === undefined) { + allNodes[nodeId].savedLabel = allNodes[nodeId].label; + allNodes[nodeId].label = undefined; + } + } + + for (let i=0; i < selectedNodes.length; i++) { + allNodes[selectedNodes[i]].hidden = false; + if (allNodes[selectedNodes[i]].savedLabel !== undefined) { + allNodes[selectedNodes[i]].label = allNodes[selectedNodes[i]].savedLabel; + allNodes[selectedNodes[i]].savedLabel = undefined; + } + } + + } else if (filterActive === true) { + // reset all nodes + for (let nodeId in allNodes) { + allNodes[nodeId].hidden = false; + if (allNodes[nodeId].savedLabel !== undefined) { + allNodes[nodeId].label = allNodes[nodeId].savedLabel; + allNodes[nodeId].savedLabel = undefined; + } + } + filterActive = false; + } + + // transform the object into an array + var updateArray = []; + if (params.nodes.length > 0) { + for (let nodeId in allNodes) { + if (allNodes.hasOwnProperty(nodeId)) { + updateArray.push(allNodes[nodeId]); + } + } + nodes.update(updateArray); + } else { + for (let nodeId in allNodes) { + if (allNodes.hasOwnProperty(nodeId)) { + updateArray.push(allNodes[nodeId]); + } + } + nodes.update(updateArray); + } +} + +function selectNode(nodes) { + network.selectNodes(nodes); + neighbourhoodHighlight({ nodes: nodes }); + return nodes; +} + +function selectNodes(nodes) { + network.selectNodes(nodes); + filterHighlight({nodes: nodes}); + return nodes; +} + +function highlightFilter(filter) { + let selectedNodes = [] + let selectedProp = filter['property'] + if (filter['item'] === 'node') { + let allNodes = nodes.get({ returnType: "Object" }); + for (let nodeId in allNodes) { + if (allNodes[nodeId][selectedProp] && filter['value'].includes((allNodes[nodeId][selectedProp]).toString())) { + selectedNodes.push(nodeId) + } + } + } + else if (filter['item'] === 'edge'){ + let allEdges = edges.get({returnType: 'object'}); + // check if the selected property exists for selected edge and select the nodes connected to the edge + for (let edge in allEdges) { + if (allEdges[edge][selectedProp] && filter['value'].includes((allEdges[edge][selectedProp]).toString())) { + selectedNodes.push(allEdges[edge]['from']) + selectedNodes.push(allEdges[edge]['to']) + } + } + } + selectNodes(selectedNodes) +} \ No newline at end of file diff --git a/lib/tom-select/tom-select.complete.min.js b/lib/tom-select/tom-select.complete.min.js new file mode 100644 index 0000000..e2e0211 --- /dev/null +++ b/lib/tom-select/tom-select.complete.min.js @@ -0,0 +1,356 @@ +/** +* Tom Select v2.0.0-rc.4 +* Licensed under the Apache License, Version 2.0 (the "License"); +*/ +!function(e,t){"object"==typeof exports&&"undefined"!=typeof module?module.exports=t():"function"==typeof define&&define.amd?define(t):(e="undefined"!=typeof globalThis?globalThis:e||self).TomSelect=t()}(this,(function(){"use strict" +function e(e,t){e.split(/\s+/).forEach((e=>{t(e)}))}class t{constructor(){this._events={}}on(t,i){e(t,(e=>{this._events[e]=this._events[e]||[],this._events[e].push(i)}))}off(t,i){var s=arguments.length +0!==s?e(t,(e=>{if(1===s)return delete this._events[e] +e in this._events!=!1&&this._events[e].splice(this._events[e].indexOf(i),1)})):this._events={}}trigger(t,...i){var s=this +e(t,(e=>{if(e in s._events!=!1)for(let t of s._events[e])t.apply(s,i)}))}}var i +const s="[̀-ͯ·ʾ]",n=new RegExp(s,"g") +var o +const r={"æ":"ae","ⱥ":"a","ø":"o"},l=new RegExp(Object.keys(r).join("|"),"g"),a=[[67,67],[160,160],[192,438],[452,652],[961,961],[1019,1019],[1083,1083],[1281,1289],[1984,1984],[5095,5095],[7429,7441],[7545,7549],[7680,7935],[8580,8580],[9398,9449],[11360,11391],[42792,42793],[42802,42851],[42873,42897],[42912,42922],[64256,64260],[65313,65338],[65345,65370]],c=e=>e.normalize("NFKD").replace(n,"").toLowerCase().replace(l,(function(e){return r[e]})),d=(e,t="|")=>{if(1==e.length)return e[0] +var i=1 +return e.forEach((e=>{i=Math.max(i,e.length)})),1==i?"["+e.join("")+"]":"(?:"+e.join(t)+")"},p=e=>{if(1===e.length)return[[e]] +var t=[] +return p(e.substring(1)).forEach((function(i){var s=i.slice(0) +s[0]=e.charAt(0)+s[0],t.push(s),(s=i.slice(0)).unshift(e.charAt(0)),t.push(s)})),t},u=e=>{void 0===o&&(o=(()=>{var e={} +a.forEach((t=>{for(let s=t[0];s<=t[1];s++){let t=String.fromCharCode(s),n=c(t) +if(n!=t.toLowerCase()){n in e||(e[n]=[n]) +var i=new RegExp(d(e[n]),"iu") +t.match(i)||e[n].push(t)}}})) +var t=Object.keys(e) +t=t.sort(((e,t)=>t.length-e.length)),i=new RegExp("("+d(t)+"[̀-ͯ·ʾ]*)","g") +var s={} +return t.sort(((e,t)=>e.length-t.length)).forEach((t=>{var i=p(t).map((t=>(t=t.map((t=>e.hasOwnProperty(t)?d(e[t]):t)),d(t,"")))) +s[t]=d(i)})),s})()) +return e.normalize("NFKD").toLowerCase().split(i).map((e=>{if(""==e)return"" +const t=c(e) +if(o.hasOwnProperty(t))return o[t] +const i=e.normalize("NFC") +return i!=e?d([e,i]):e})).join("")},h=(e,t)=>{if(e)return e[t]},g=(e,t)=>{if(e){for(var i,s=t.split(".");(i=s.shift())&&(e=e[i]););return e}},f=(e,t,i)=>{var s,n +return e?-1===(n=(e+="").search(t.regex))?0:(s=t.string.length/e.length,0===n&&(s+=.5),s*i):0},v=e=>(e+"").replace(/([\$\(-\+\.\?\[-\^\{-\}])/g,"\\$1"),m=(e,t)=>{var i=e[t] +if("function"==typeof i)return i +i&&!Array.isArray(i)&&(e[t]=[i])},y=(e,t)=>{if(Array.isArray(e))e.forEach(t) +else for(var i in e)e.hasOwnProperty(i)&&t(e[i],i)},O=(e,t)=>"number"==typeof e&&"number"==typeof t?e>t?1:e(t=c(t+"").toLowerCase())?1:t>e?-1:0 +class b{constructor(e,t){this.items=e,this.settings=t||{diacritics:!0}}tokenize(e,t,i){if(!e||!e.length)return[] +const s=[],n=e.split(/\s+/) +var o +return i&&(o=new RegExp("^("+Object.keys(i).map(v).join("|")+"):(.*)$")),n.forEach((e=>{let i,n=null,r=null +o&&(i=e.match(o))&&(n=i[1],e=i[2]),e.length>0&&(r=v(e),this.settings.diacritics&&(r=u(r)),t&&(r="\\b"+r)),s.push({string:e,regex:r?new RegExp(r,"iu"):null,field:n})})),s}getScoreFunction(e,t){var i=this.prepareSearch(e,t) +return this._getScoreFunction(i)}_getScoreFunction(e){const t=e.tokens,i=t.length +if(!i)return function(){return 0} +const s=e.options.fields,n=e.weights,o=s.length,r=e.getAttrFn +if(!o)return function(){return 1} +const l=1===o?function(e,t){const i=s[0].field +return f(r(t,i),e,n[i])}:function(e,t){var i=0 +if(e.field){const s=r(t,e.field) +!e.regex&&s?i+=1/o:i+=f(s,e,1)}else y(n,((s,n)=>{i+=f(r(t,n),e,s)})) +return i/o} +return 1===i?function(e){return l(t[0],e)}:"and"===e.options.conjunction?function(e){for(var s,n=0,o=0;n{s+=l(t,e)})),s/i}}getSortFunction(e,t){var i=this.prepareSearch(e,t) +return this._getSortFunction(i)}_getSortFunction(e){var t,i,s +const n=this,o=e.options,r=!e.query&&o.sort_empty?o.sort_empty:o.sort,l=[],a=[] +if("function"==typeof r)return r.bind(this) +const c=function(t,i){return"$score"===t?i.score:e.getAttrFn(n.items[i.id],t)} +if(r)for(t=0,i=r.length;t{"string"==typeof t&&(t={field:t,weight:1}),e.push(t),i[t.field]="weight"in t?t.weight:1})),s.fields=e}return{options:s,query:e.toLowerCase().trim(),tokens:this.tokenize(e,s.respect_word_boundaries,i),total:0,items:[],weights:i,getAttrFn:s.nesting?g:h}}search(e,t){var i,s,n=this +s=this.prepareSearch(e,t),t=s.options,e=s.query +const o=t.score||n._getScoreFunction(s) +e.length?y(n.items,((e,n)=>{i=o(e),(!1===t.filter||i>0)&&s.items.push({score:i,id:n})})):y(n.items,((e,t)=>{s.items.push({score:1,id:t})})) +const r=n._getSortFunction(s) +return r&&s.items.sort(r),s.total=s.items.length,"number"==typeof t.limit&&(s.items=s.items.slice(0,t.limit)),s}}const w=e=>{if(e.jquery)return e[0] +if(e instanceof HTMLElement)return e +if(e.indexOf("<")>-1){let t=document.createElement("div") +return t.innerHTML=e.trim(),t.firstChild}return document.querySelector(e)},_=(e,t)=>{var i=document.createEvent("HTMLEvents") +i.initEvent(t,!0,!1),e.dispatchEvent(i)},I=(e,t)=>{Object.assign(e.style,t)},C=(e,...t)=>{var i=A(t);(e=x(e)).map((e=>{i.map((t=>{e.classList.add(t)}))}))},S=(e,...t)=>{var i=A(t);(e=x(e)).map((e=>{i.map((t=>{e.classList.remove(t)}))}))},A=e=>{var t=[] +return y(e,(e=>{"string"==typeof e&&(e=e.trim().split(/[\11\12\14\15\40]/)),Array.isArray(e)&&(t=t.concat(e))})),t.filter(Boolean)},x=e=>(Array.isArray(e)||(e=[e]),e),k=(e,t,i)=>{if(!i||i.contains(e))for(;e&&e.matches;){if(e.matches(t))return e +e=e.parentNode}},F=(e,t=0)=>t>0?e[e.length-1]:e[0],L=(e,t)=>{if(!e)return-1 +t=t||e.nodeName +for(var i=0;e=e.previousElementSibling;)e.matches(t)&&i++ +return i},P=(e,t)=>{y(t,((t,i)=>{null==t?e.removeAttribute(i):e.setAttribute(i,""+t)}))},E=(e,t)=>{e.parentNode&&e.parentNode.replaceChild(t,e)},T=(e,t)=>{if(null===t)return +if("string"==typeof t){if(!t.length)return +t=new RegExp(t,"i")}const i=e=>3===e.nodeType?(e=>{var i=e.data.match(t) +if(i&&e.data.length>0){var s=document.createElement("span") +s.className="highlight" +var n=e.splitText(i.index) +n.splitText(i[0].length) +var o=n.cloneNode(!0) +return s.appendChild(o),E(n,s),1}return 0})(e):((e=>{if(1===e.nodeType&&e.childNodes&&!/(script|style)/i.test(e.tagName)&&("highlight"!==e.className||"SPAN"!==e.tagName))for(var t=0;t0},render:{}} +const q=e=>null==e?null:D(e),D=e=>"boolean"==typeof e?e?"1":"0":e+"",N=e=>(e+"").replace(/&/g,"&").replace(//g,">").replace(/"/g,"""),z=(e,t)=>{var i +return function(s,n){var o=this +i&&(o.loading=Math.max(o.loading-1,0),clearTimeout(i)),i=setTimeout((function(){i=null,o.loadedSearches[s]=!0,e.call(o,s,n)}),t)}},R=(e,t,i)=>{var s,n=e.trigger,o={} +for(s in e.trigger=function(){var i=arguments[0] +if(-1===t.indexOf(i))return n.apply(e,arguments) +o[i]=arguments},i.apply(e,[]),e.trigger=n,o)n.apply(e,o[s])},H=(e,t=!1)=>{e&&(e.preventDefault(),t&&e.stopPropagation())},B=(e,t,i,s)=>{e.addEventListener(t,i,s)},K=(e,t)=>!!t&&(!!t[e]&&1===(t.altKey?1:0)+(t.ctrlKey?1:0)+(t.shiftKey?1:0)+(t.metaKey?1:0)),M=(e,t)=>{const i=e.getAttribute("id") +return i||(e.setAttribute("id",t),t)},Q=e=>e.replace(/[\\"']/g,"\\$&"),G=(e,t)=>{t&&e.append(t)} +function U(e,t){var i=Object.assign({},j,t),s=i.dataAttr,n=i.labelField,o=i.valueField,r=i.disabledField,l=i.optgroupField,a=i.optgroupLabelField,c=i.optgroupValueField,d=e.tagName.toLowerCase(),p=e.getAttribute("placeholder")||e.getAttribute("data-placeholder") +if(!p&&!i.allowEmptyOption){let t=e.querySelector('option[value=""]') +t&&(p=t.textContent)}var u,h,g,f,v,m,O={placeholder:p,options:[],optgroups:[],items:[],maxItems:null} +return"select"===d?(h=O.options,g={},f=1,v=e=>{var t=Object.assign({},e.dataset),i=s&&t[s] +return"string"==typeof i&&i.length&&(t=Object.assign(t,JSON.parse(i))),t},m=(e,t)=>{var s=q(e.value) +if(null!=s&&(s||i.allowEmptyOption)){if(g.hasOwnProperty(s)){if(t){var a=g[s][l] +a?Array.isArray(a)?a.push(t):g[s][l]=[a,t]:g[s][l]=t}}else{var c=v(e) +c[n]=c[n]||e.textContent,c[o]=c[o]||s,c[r]=c[r]||e.disabled,c[l]=c[l]||t,c.$option=e,g[s]=c,h.push(c)}e.selected&&O.items.push(s)}},O.maxItems=e.hasAttribute("multiple")?null:1,y(e.children,(e=>{var t,i,s +"optgroup"===(u=e.tagName.toLowerCase())?((s=v(t=e))[a]=s[a]||t.getAttribute("label")||"",s[c]=s[c]||f++,s[r]=s[r]||t.disabled,O.optgroups.push(s),i=s[c],y(t.children,(e=>{m(e,i)}))):"option"===u&&m(e)}))):(()=>{const t=e.getAttribute(s) +if(t)O.options=JSON.parse(t),y(O.options,(e=>{O.items.push(e[o])})) +else{var r=e.value.trim()||"" +if(!i.allowEmptyOption&&!r.length)return +const t=r.split(i.delimiter) +y(t,(e=>{const t={} +t[n]=e,t[o]=e,O.options.push(t)})),O.items=t}})(),Object.assign({},j,O,t)}var W=0 +class J extends(function(e){return e.plugins={},class extends e{constructor(...e){super(...e),this.plugins={names:[],settings:{},requested:{},loaded:{}}}static define(t,i){e.plugins[t]={name:t,fn:i}}initializePlugins(e){var t,i +const s=this,n=[] +if(Array.isArray(e))e.forEach((e=>{"string"==typeof e?n.push(e):(s.plugins.settings[e.name]=e.options,n.push(e.name))})) +else if(e)for(t in e)e.hasOwnProperty(t)&&(s.plugins.settings[t]=e[t],n.push(t)) +for(;i=n.shift();)s.require(i)}loadPlugin(t){var i=this,s=i.plugins,n=e.plugins[t] +if(!e.plugins.hasOwnProperty(t))throw new Error('Unable to find "'+t+'" plugin') +s.requested[t]=!0,s.loaded[t]=n.fn.apply(i,[i.plugins.settings[t]||{}]),s.names.push(t)}require(e){var t=this,i=t.plugins +if(!t.plugins.loaded.hasOwnProperty(e)){if(i.requested[e])throw new Error('Plugin has circular dependency ("'+e+'")') +t.loadPlugin(e)}return i.loaded[e]}}}(t)){constructor(e,t){var i +super(),this.order=0,this.isOpen=!1,this.isDisabled=!1,this.isInvalid=!1,this.isValid=!0,this.isLocked=!1,this.isFocused=!1,this.isInputHidden=!1,this.isSetup=!1,this.ignoreFocus=!1,this.hasOptions=!1,this.lastValue="",this.caretPos=0,this.loading=0,this.loadedSearches={},this.activeOption=null,this.activeItems=[],this.optgroups={},this.options={},this.userOptions={},this.items=[],W++ +var s=w(e) +if(s.tomselect)throw new Error("Tom Select already initialized on this element") +s.tomselect=this,i=(window.getComputedStyle&&window.getComputedStyle(s,null)).getPropertyValue("direction") +const n=U(s,t) +this.settings=n,this.input=s,this.tabIndex=s.tabIndex||0,this.is_select_tag="select"===s.tagName.toLowerCase(),this.rtl=/rtl/i.test(i),this.inputId=M(s,"tomselect-"+W),this.isRequired=s.required,this.sifter=new b(this.options,{diacritics:n.diacritics}),n.mode=n.mode||(1===n.maxItems?"single":"multi"),"boolean"!=typeof n.hideSelected&&(n.hideSelected="multi"===n.mode),"boolean"!=typeof n.hidePlaceholder&&(n.hidePlaceholder="multi"!==n.mode) +var o=n.createFilter +"function"!=typeof o&&("string"==typeof o&&(o=new RegExp(o)),o instanceof RegExp?n.createFilter=e=>o.test(e):n.createFilter=()=>!0),this.initializePlugins(n.plugins),this.setupCallbacks(),this.setupTemplates() +const r=w("
"),l=w("
"),a=this._render("dropdown"),c=w('
'),d=this.input.getAttribute("class")||"",p=n.mode +var u +if(C(r,n.wrapperClass,d,p),C(l,n.controlClass),G(r,l),C(a,n.dropdownClass,p),n.copyClassesToDropdown&&C(a,d),C(c,n.dropdownContentClass),G(a,c),w(n.dropdownParent||r).appendChild(a),n.hasOwnProperty("controlInput"))n.controlInput?(u=w(n.controlInput),this.focus_node=u):(u=w(""),this.focus_node=l) +else{u=w('') +y(["autocorrect","autocapitalize","autocomplete"],(e=>{s.getAttribute(e)&&P(u,{[e]:s.getAttribute(e)})})),u.tabIndex=-1,l.appendChild(u),this.focus_node=u}this.wrapper=r,this.dropdown=a,this.dropdown_content=c,this.control=l,this.control_input=u,this.setup()}setup(){const e=this,t=e.settings,i=e.control_input,s=e.dropdown,n=e.dropdown_content,o=e.wrapper,r=e.control,l=e.input,a=e.focus_node,c={passive:!0},d=e.inputId+"-ts-dropdown" +P(n,{id:d}),P(a,{role:"combobox","aria-haspopup":"listbox","aria-expanded":"false","aria-controls":d}) +const p=M(a,e.inputId+"-ts-control"),u="label[for='"+(e=>e.replace(/['"\\]/g,"\\$&"))(e.inputId)+"']",h=document.querySelector(u),g=e.focus.bind(e) +if(h){B(h,"click",g),P(h,{for:p}) +const t=M(h,e.inputId+"-ts-label") +P(a,{"aria-labelledby":t}),P(n,{"aria-labelledby":t})}if(o.style.width=l.style.width,e.plugins.names.length){const t="plugin-"+e.plugins.names.join(" plugin-") +C([o,s],t)}(null===t.maxItems||t.maxItems>1)&&e.is_select_tag&&P(l,{multiple:"multiple"}),e.settings.placeholder&&P(i,{placeholder:t.placeholder}),!e.settings.splitOn&&e.settings.delimiter&&(e.settings.splitOn=new RegExp("\\s*"+v(e.settings.delimiter)+"+\\s*")),t.load&&t.loadThrottle&&(t.load=z(t.load,t.loadThrottle)),e.control_input.type=l.type,B(s,"click",(t=>{const i=k(t.target,"[data-selectable]") +i&&(e.onOptionSelect(t,i),H(t,!0))})),B(r,"click",(t=>{var s=k(t.target,"[data-ts-item]",r) +s&&e.onItemSelect(t,s)?H(t,!0):""==i.value&&(e.onClick(),H(t,!0))})),B(i,"mousedown",(e=>{""!==i.value&&e.stopPropagation()})),B(a,"keydown",(t=>e.onKeyDown(t))),B(i,"keypress",(t=>e.onKeyPress(t))),B(i,"input",(t=>e.onInput(t))),B(a,"resize",(()=>e.positionDropdown()),c),B(a,"blur",(t=>e.onBlur(t))),B(a,"focus",(t=>e.onFocus(t))),B(a,"paste",(t=>e.onPaste(t))) +const f=t=>{const i=t.composedPath()[0] +if(!o.contains(i)&&!s.contains(i))return e.isFocused&&e.blur(),void e.inputState() +H(t,!0)} +var m=()=>{e.isOpen&&e.positionDropdown()} +B(document,"mousedown",f),B(window,"scroll",m,c),B(window,"resize",m,c),this._destroy=()=>{document.removeEventListener("mousedown",f),window.removeEventListener("sroll",m),window.removeEventListener("resize",m),h&&h.removeEventListener("click",g)},this.revertSettings={innerHTML:l.innerHTML,tabIndex:l.tabIndex},l.tabIndex=-1,l.insertAdjacentElement("afterend",e.wrapper),e.sync(!1),t.items=[],delete t.optgroups,delete t.options,B(l,"invalid",(t=>{e.isValid&&(e.isValid=!1,e.isInvalid=!0,e.refreshState())})),e.updateOriginalInput(),e.refreshItems(),e.close(!1),e.inputState(),e.isSetup=!0,l.disabled?e.disable():e.enable(),e.on("change",this.onChange),C(l,"tomselected","ts-hidden-accessible"),e.trigger("initialize"),!0===t.preload&&e.preload()}setupOptions(e=[],t=[]){this.addOptions(e),y(t,(e=>{this.registerOptionGroup(e)}))}setupTemplates(){var e=this,t=e.settings.labelField,i=e.settings.optgroupLabelField,s={optgroup:e=>{let t=document.createElement("div") +return t.className="optgroup",t.appendChild(e.options),t},optgroup_header:(e,t)=>'
'+t(e[i])+"
",option:(e,i)=>"
"+i(e[t])+"
",item:(e,i)=>"
"+i(e[t])+"
",option_create:(e,t)=>'
Add '+t(e.input)+"
",no_results:()=>'
No results found
',loading:()=>'
',not_loading:()=>{},dropdown:()=>"
"} +e.settings.render=Object.assign({},s,e.settings.render)}setupCallbacks(){var e,t,i={initialize:"onInitialize",change:"onChange",item_add:"onItemAdd",item_remove:"onItemRemove",item_select:"onItemSelect",clear:"onClear",option_add:"onOptionAdd",option_remove:"onOptionRemove",option_clear:"onOptionClear",optgroup_add:"onOptionGroupAdd",optgroup_remove:"onOptionGroupRemove",optgroup_clear:"onOptionGroupClear",dropdown_open:"onDropdownOpen",dropdown_close:"onDropdownClose",type:"onType",load:"onLoad",focus:"onFocus",blur:"onBlur"} +for(e in i)(t=this.settings[i[e]])&&this.on(e,t)}sync(e=!0){const t=this,i=e?U(t.input,{delimiter:t.settings.delimiter}):t.settings +t.setupOptions(i.options,i.optgroups),t.setValue(i.items,!0),t.lastQuery=null}onClick(){var e=this +if(e.activeItems.length>0)return e.clearActiveItems(),void e.focus() +e.isFocused&&e.isOpen?e.blur():e.focus()}onMouseDown(){}onChange(){_(this.input,"input"),_(this.input,"change")}onPaste(e){var t=this +t.isFull()||t.isInputHidden||t.isLocked?H(e):t.settings.splitOn&&setTimeout((()=>{var e=t.inputValue() +if(e.match(t.settings.splitOn)){var i=e.trim().split(t.settings.splitOn) +y(i,(e=>{t.createItem(e)}))}}),0)}onKeyPress(e){var t=this +if(!t.isLocked){var i=String.fromCharCode(e.keyCode||e.which) +return t.settings.create&&"multi"===t.settings.mode&&i===t.settings.delimiter?(t.createItem(),void H(e)):void 0}H(e)}onKeyDown(e){var t=this +if(t.isLocked)9!==e.keyCode&&H(e) +else{switch(e.keyCode){case 65:if(K(V,e))return H(e),void t.selectAll() +break +case 27:return t.isOpen&&(H(e,!0),t.close()),void t.clearActiveItems() +case 40:if(!t.isOpen&&t.hasOptions)t.open() +else if(t.activeOption){let e=t.getAdjacent(t.activeOption,1) +e&&t.setActiveOption(e)}return void H(e) +case 38:if(t.activeOption){let e=t.getAdjacent(t.activeOption,-1) +e&&t.setActiveOption(e)}return void H(e) +case 13:return void(t.isOpen&&t.activeOption?(t.onOptionSelect(e,t.activeOption),H(e)):t.settings.create&&t.createItem()&&H(e)) +case 37:return void t.advanceSelection(-1,e) +case 39:return void t.advanceSelection(1,e) +case 9:return void(t.settings.selectOnTab&&(t.isOpen&&t.activeOption&&(t.onOptionSelect(e,t.activeOption),H(e)),t.settings.create&&t.createItem()&&H(e))) +case 8:case 46:return void t.deleteSelection(e)}t.isInputHidden&&!K(V,e)&&H(e)}}onInput(e){var t=this +if(!t.isLocked){var i=t.inputValue() +t.lastValue!==i&&(t.lastValue=i,t.settings.shouldLoad.call(t,i)&&t.load(i),t.refreshOptions(),t.trigger("type",i))}}onFocus(e){var t=this,i=t.isFocused +if(t.isDisabled)return t.blur(),void H(e) +t.ignoreFocus||(t.isFocused=!0,"focus"===t.settings.preload&&t.preload(),i||t.trigger("focus"),t.activeItems.length||(t.showInput(),t.refreshOptions(!!t.settings.openOnFocus)),t.refreshState())}onBlur(e){if(!1!==document.hasFocus()){var t=this +if(t.isFocused){t.isFocused=!1,t.ignoreFocus=!1 +var i=()=>{t.close(),t.setActiveItem(),t.setCaret(t.items.length),t.trigger("blur")} +t.settings.create&&t.settings.createOnBlur?t.createItem(null,!1,i):i()}}}onOptionSelect(e,t){var i,s=this +t&&(t.parentElement&&t.parentElement.matches("[data-disabled]")||(t.classList.contains("create")?s.createItem(null,!0,(()=>{s.settings.closeAfterSelect&&s.close()})):void 0!==(i=t.dataset.value)&&(s.lastQuery=null,s.addItem(i),s.settings.closeAfterSelect&&s.close(),!s.settings.hideSelected&&e.type&&/click/.test(e.type)&&s.setActiveOption(t))))}onItemSelect(e,t){var i=this +return!i.isLocked&&"multi"===i.settings.mode&&(H(e),i.setActiveItem(t,e),!0)}canLoad(e){return!!this.settings.load&&!this.loadedSearches.hasOwnProperty(e)}load(e){const t=this +if(!t.canLoad(e))return +C(t.wrapper,t.settings.loadingClass),t.loading++ +const i=t.loadCallback.bind(t) +t.settings.load.call(t,e,i)}loadCallback(e,t){const i=this +i.loading=Math.max(i.loading-1,0),i.lastQuery=null,i.clearActiveOption(),i.setupOptions(e,t),i.refreshOptions(i.isFocused&&!i.isInputHidden),i.loading||S(i.wrapper,i.settings.loadingClass),i.trigger("load",e,t)}preload(){var e=this.wrapper.classList +e.contains("preloaded")||(e.add("preloaded"),this.load(""))}setTextboxValue(e=""){var t=this.control_input +t.value!==e&&(t.value=e,_(t,"update"),this.lastValue=e)}getValue(){return this.is_select_tag&&this.input.hasAttribute("multiple")?this.items:this.items.join(this.settings.delimiter)}setValue(e,t){R(this,t?[]:["change"],(()=>{this.clear(t),this.addItems(e,t)}))}setMaxItems(e){0===e&&(e=null),this.settings.maxItems=e,this.refreshState()}setActiveItem(e,t){var i,s,n,o,r,l,a=this +if("single"!==a.settings.mode){if(!e)return a.clearActiveItems(),void(a.isFocused&&a.showInput()) +if("click"===(i=t&&t.type.toLowerCase())&&K("shiftKey",t)&&a.activeItems.length){for(l=a.getLastActive(),(n=Array.prototype.indexOf.call(a.control.children,l))>(o=Array.prototype.indexOf.call(a.control.children,e))&&(r=n,n=o,o=r),s=n;s<=o;s++)e=a.control.children[s],-1===a.activeItems.indexOf(e)&&a.setActiveItemClass(e) +H(t)}else"click"===i&&K(V,t)||"keydown"===i&&K("shiftKey",t)?e.classList.contains("active")?a.removeActiveItem(e):a.setActiveItemClass(e):(a.clearActiveItems(),a.setActiveItemClass(e)) +a.hideInput(),a.isFocused||a.focus()}}setActiveItemClass(e){const t=this,i=t.control.querySelector(".last-active") +i&&S(i,"last-active"),C(e,"active last-active"),t.trigger("item_select",e),-1==t.activeItems.indexOf(e)&&t.activeItems.push(e)}removeActiveItem(e){var t=this.activeItems.indexOf(e) +this.activeItems.splice(t,1),S(e,"active")}clearActiveItems(){S(this.activeItems,"active"),this.activeItems=[]}setActiveOption(e){e!==this.activeOption&&(this.clearActiveOption(),e&&(this.activeOption=e,P(this.focus_node,{"aria-activedescendant":e.getAttribute("id")}),P(e,{"aria-selected":"true"}),C(e,"active"),this.scrollToOption(e)))}scrollToOption(e,t){if(!e)return +const i=this.dropdown_content,s=i.clientHeight,n=i.scrollTop||0,o=e.offsetHeight,r=e.getBoundingClientRect().top-i.getBoundingClientRect().top+n +r+o>s+n?this.scroll(r-s+o,t):r0||!e.isFocused&&e.settings.hidePlaceholder&&e.items.length>0?(e.setTextboxValue(),e.isInputHidden=!0):(e.settings.hidePlaceholder&&e.items.length>0&&P(e.control_input,{placeholder:""}),e.isInputHidden=!1),e.wrapper.classList.toggle("input-hidden",e.isInputHidden))}hideInput(){this.inputState()}showInput(){this.inputState()}inputValue(){return this.control_input.value.trim()}focus(){var e=this +e.isDisabled||(e.ignoreFocus=!0,e.control_input.offsetWidth?e.control_input.focus():e.focus_node.focus(),setTimeout((()=>{e.ignoreFocus=!1,e.onFocus()}),0))}blur(){this.focus_node.blur(),this.onBlur()}getScoreFunction(e){return this.sifter.getScoreFunction(e,this.getSearchOptions())}getSearchOptions(){var e=this.settings,t=e.sortField +return"string"==typeof e.sortField&&(t=[{field:e.sortField}]),{fields:e.searchField,conjunction:e.searchConjunction,sort:t,nesting:e.nesting}}search(e){var t,i,s,n=this,o=this.getSearchOptions() +if(n.settings.score&&"function"!=typeof(s=n.settings.score.call(n,e)))throw new Error('Tom Select "score" setting must be a function that returns a function') +if(e!==n.lastQuery?(n.lastQuery=e,i=n.sifter.search(e,Object.assign(o,{score:s})),n.currentResults=i):i=Object.assign({},n.currentResults),n.settings.hideSelected)for(t=i.items.length-1;t>=0;t--){let e=q(i.items[t].id) +e&&-1!==n.items.indexOf(e)&&i.items.splice(t,1)}return i}refreshOptions(e=!0){var t,i,s,n,o,r,l,a,c,d,p +const u={},h=[] +var g,f=this,v=f.inputValue(),m=f.search(v),O=f.activeOption,b=f.settings.shouldOpen||!1,w=f.dropdown_content +for(O&&(c=O.dataset.value,d=O.closest("[data-group]")),n=m.items.length,"number"==typeof f.settings.maxOptions&&(n=Math.min(n,f.settings.maxOptions)),n>0&&(b=!0),t=0;t0&&(l=l.cloneNode(!0),P(l,{id:n.$id+"-clone-"+i,"aria-selected":null}),l.classList.add("ts-cloned"),S(l,"active")),c==e&&d&&d.dataset.group===o&&(O=l),u[o].appendChild(l)}this.settings.lockOptgroupOrder&&h.sort(((e,t)=>(f.optgroups[e]&&f.optgroups[e].$order||0)-(f.optgroups[t]&&f.optgroups[t].$order||0))),l=document.createDocumentFragment(),y(h,(e=>{if(f.optgroups.hasOwnProperty(e)&&u[e].children.length){let t=document.createDocumentFragment(),i=f.render("optgroup_header",f.optgroups[e]) +G(t,i),G(t,u[e]) +let s=f.render("optgroup",{group:f.optgroups[e],options:t}) +G(l,s)}else G(l,u[e])})),w.innerHTML="",G(w,l),f.settings.highlight&&(g=w.querySelectorAll("span.highlight"),Array.prototype.forEach.call(g,(function(e){var t=e.parentNode +t.replaceChild(e.firstChild,e),t.normalize()})),m.query.length&&m.tokens.length&&y(m.tokens,(e=>{T(w,e.regex)}))) +var _=e=>{let t=f.render(e,{input:v}) +return t&&(b=!0,w.insertBefore(t,w.firstChild)),t} +if(f.loading?_("loading"):f.settings.shouldLoad.call(f,v)?0===m.items.length&&_("no_results"):_("not_loading"),(a=f.canCreate(v))&&(p=_("option_create")),f.hasOptions=m.items.length>0||a,b){if(m.items.length>0){if(!w.contains(O)&&"single"===f.settings.mode&&f.items.length&&(O=f.getOption(f.items[0])),!w.contains(O)){let e=0 +p&&!f.settings.addPrecedence&&(e=1),O=f.selectable()[e]}}else p&&(O=p) +e&&!f.isOpen&&(f.open(),f.scrollToOption(O,"auto")),f.setActiveOption(O)}else f.clearActiveOption(),e&&f.isOpen&&f.close(!1)}selectable(){return this.dropdown_content.querySelectorAll("[data-selectable]")}addOption(e,t=!1){const i=this +if(Array.isArray(e))return i.addOptions(e,t),!1 +const s=q(e[i.settings.valueField]) +return null!==s&&!i.options.hasOwnProperty(s)&&(e.$order=e.$order||++i.order,e.$id=i.inputId+"-opt-"+e.$order,i.options[s]=e,i.lastQuery=null,t&&(i.userOptions[s]=t,i.trigger("option_add",s,e)),s)}addOptions(e,t=!1){y(e,(e=>{this.addOption(e,t)}))}registerOption(e){return this.addOption(e)}registerOptionGroup(e){var t=q(e[this.settings.optgroupValueField]) +return null!==t&&(e.$order=e.$order||++this.order,this.optgroups[t]=e,t)}addOptionGroup(e,t){var i +t[this.settings.optgroupValueField]=e,(i=this.registerOptionGroup(t))&&this.trigger("optgroup_add",i,t)}removeOptionGroup(e){this.optgroups.hasOwnProperty(e)&&(delete this.optgroups[e],this.clearCache(),this.trigger("optgroup_remove",e))}clearOptionGroups(){this.optgroups={},this.clearCache(),this.trigger("optgroup_clear")}updateOption(e,t){const i=this +var s,n +const o=q(e),r=q(t[i.settings.valueField]) +if(null===o)return +if(!i.options.hasOwnProperty(o))return +if("string"!=typeof r)throw new Error("Value must be set in option data") +const l=i.getOption(o),a=i.getItem(o) +if(t.$order=t.$order||i.options[o].$order,delete i.options[o],i.uncacheValue(r),i.options[r]=t,l){if(i.dropdown_content.contains(l)){const e=i._render("option",t) +E(l,e),i.activeOption===l&&i.setActiveOption(e)}l.remove()}a&&(-1!==(n=i.items.indexOf(o))&&i.items.splice(n,1,r),s=i._render("item",t),a.classList.contains("active")&&C(s,"active"),E(a,s)),i.lastQuery=null}removeOption(e,t){const i=this +e=D(e),i.uncacheValue(e),delete i.userOptions[e],delete i.options[e],i.lastQuery=null,i.trigger("option_remove",e),i.removeItem(e,t)}clearOptions(){this.loadedSearches={},this.userOptions={},this.clearCache() +var e={} +y(this.options,((t,i)=>{this.items.indexOf(i)>=0&&(e[i]=this.options[i])})),this.options=this.sifter.items=e,this.lastQuery=null,this.trigger("option_clear")}getOption(e,t=!1){const i=q(e) +if(null!==i&&this.options.hasOwnProperty(i)){const e=this.options[i] +if(e.$div)return e.$div +if(t)return this._render("option",e)}return null}getAdjacent(e,t,i="option"){var s +if(!e)return null +s="item"==i?this.controlChildren():this.dropdown_content.querySelectorAll("[data-selectable]") +for(let i=0;i0?s[i+1]:s[i-1] +return null}getItem(e){if("object"==typeof e)return e +var t=q(e) +return null!==t?this.control.querySelector(`[data-value="${Q(t)}"]`):null}addItems(e,t){var i=this,s=Array.isArray(e)?e:[e] +for(let e=0,n=(s=s.filter((e=>-1===i.items.indexOf(e)))).length;e{var i,s +const n=this,o=n.settings.mode,r=q(e) +if((!r||-1===n.items.indexOf(r)||("single"===o&&n.close(),"single"!==o&&n.settings.duplicates))&&null!==r&&n.options.hasOwnProperty(r)&&("single"===o&&n.clear(t),"multi"!==o||!n.isFull())){if(i=n._render("item",n.options[r]),n.control.contains(i)&&(i=i.cloneNode(!0)),s=n.isFull(),n.items.splice(n.caretPos,0,r),n.insertAtCaret(i),n.isSetup){if(!n.isPending&&n.settings.hideSelected){let e=n.getOption(r),t=n.getAdjacent(e,1) +t&&n.setActiveOption(t)}n.isPending||n.refreshOptions(n.isFocused&&"single"!==o),0!=n.settings.closeAfterSelect&&n.isFull()?n.close():n.isPending||n.positionDropdown(),n.trigger("item_add",r,i),n.isPending||n.updateOriginalInput({silent:t})}(!n.isPending||!s&&n.isFull())&&(n.inputState(),n.refreshState())}}))}removeItem(e=null,t){const i=this +if(!(e=i.getItem(e)))return +var s,n +const o=e.dataset.value +s=L(e),e.remove(),e.classList.contains("active")&&(n=i.activeItems.indexOf(e),i.activeItems.splice(n,1),S(e,"active")),i.items.splice(s,1),i.lastQuery=null,!i.settings.persist&&i.userOptions.hasOwnProperty(o)&&i.removeOption(o,t),s{})){var s,n=this,o=n.caretPos +if(e=e||n.inputValue(),!n.canCreate(e))return i(),!1 +n.lock() +var r=!1,l=e=>{if(n.unlock(),!e||"object"!=typeof e)return i() +var s=q(e[n.settings.valueField]) +if("string"!=typeof s)return i() +n.setTextboxValue(),n.addOption(e,!0),n.setCaret(o),n.addItem(s),n.refreshOptions(t&&"single"!==n.settings.mode),i(e),r=!0} +return s="function"==typeof n.settings.create?n.settings.create.call(this,e,l):{[n.settings.labelField]:e,[n.settings.valueField]:e},r||l(s),!0}refreshItems(){var e=this +e.lastQuery=null,e.isSetup&&e.addItems(e.items),e.updateOriginalInput(),e.refreshState()}refreshState(){const e=this +e.refreshValidityState() +const t=e.isFull(),i=e.isLocked +e.wrapper.classList.toggle("rtl",e.rtl) +const s=e.wrapper.classList +var n +s.toggle("focus",e.isFocused),s.toggle("disabled",e.isDisabled),s.toggle("required",e.isRequired),s.toggle("invalid",!e.isValid),s.toggle("locked",i),s.toggle("full",t),s.toggle("input-active",e.isFocused&&!e.isInputHidden),s.toggle("dropdown-active",e.isOpen),s.toggle("has-options",(n=e.options,0===Object.keys(n).length)),s.toggle("has-items",e.items.length>0)}refreshValidityState(){var e=this +e.input.checkValidity&&(e.isValid=e.input.checkValidity(),e.isInvalid=!e.isValid)}isFull(){return null!==this.settings.maxItems&&this.items.length>=this.settings.maxItems}updateOriginalInput(e={}){const t=this +var i,s +const n=t.input.querySelector('option[value=""]') +if(t.is_select_tag){const e=[] +function o(i,s,o){return i||(i=w('")),i!=n&&t.input.append(i),e.push(i),i.selected=!0,i}t.input.querySelectorAll("option:checked").forEach((e=>{e.selected=!1})),0==t.items.length&&"single"==t.settings.mode?o(n,"",""):t.items.forEach((n=>{if(i=t.options[n],s=i[t.settings.labelField]||"",e.includes(i.$option)){o(t.input.querySelector(`option[value="${Q(n)}"]:not(:checked)`),n,s)}else i.$option=o(i.$option,n,s)}))}else t.input.value=t.getValue() +t.isSetup&&(e.silent||t.trigger("change",t.getValue()))}open(){var e=this +e.isLocked||e.isOpen||"multi"===e.settings.mode&&e.isFull()||(e.isOpen=!0,P(e.focus_node,{"aria-expanded":"true"}),e.refreshState(),I(e.dropdown,{visibility:"hidden",display:"block"}),e.positionDropdown(),I(e.dropdown,{visibility:"visible",display:"block"}),e.focus(),e.trigger("dropdown_open",e.dropdown))}close(e=!0){var t=this,i=t.isOpen +e&&(t.setTextboxValue(),"single"===t.settings.mode&&t.items.length&&t.hideInput()),t.isOpen=!1,P(t.focus_node,{"aria-expanded":"false"}),I(t.dropdown,{display:"none"}),t.settings.hideSelected&&t.clearActiveOption(),t.refreshState(),i&&t.trigger("dropdown_close",t.dropdown)}positionDropdown(){if("body"===this.settings.dropdownParent){var e=this.control,t=e.getBoundingClientRect(),i=e.offsetHeight+t.top+window.scrollY,s=t.left+window.scrollX +I(this.dropdown,{width:t.width+"px",top:i+"px",left:s+"px"})}}clear(e){var t=this +if(t.items.length){var i=t.controlChildren() +y(i,(e=>{t.removeItem(e,!0)})),t.showInput(),e||t.updateOriginalInput(),t.trigger("clear")}}insertAtCaret(e){const t=this,i=t.caretPos,s=t.control +s.insertBefore(e,s.children[i]),t.setCaret(i+1)}deleteSelection(e){var t,i,s,n,o,r=this +t=e&&8===e.keyCode?-1:1,i={start:(o=r.control_input).selectionStart||0,length:(o.selectionEnd||0)-(o.selectionStart||0)} +const l=[] +if(r.activeItems.length)n=F(r.activeItems,t),s=L(n),t>0&&s++,y(r.activeItems,(e=>l.push(e))) +else if((r.isFocused||"single"===r.settings.mode)&&r.items.length){const e=r.controlChildren() +t<0&&0===i.start&&0===i.length?l.push(e[r.caretPos-1]):t>0&&i.start===r.inputValue().length&&l.push(e[r.caretPos])}const a=l.map((e=>e.dataset.value)) +if(!a.length||"function"==typeof r.settings.onDelete&&!1===r.settings.onDelete.call(r,a,e))return!1 +for(H(e,!0),void 0!==s&&r.setCaret(s);l.length;)r.removeItem(l.pop()) +return r.showInput(),r.positionDropdown(),r.refreshOptions(!1),!0}advanceSelection(e,t){var i,s,n=this +n.rtl&&(e*=-1),n.inputValue().length||(K(V,t)||K("shiftKey",t)?(s=(i=n.getLastActive(e))?i.classList.contains("active")?n.getAdjacent(i,e,"item"):i:e>0?n.control_input.nextElementSibling:n.control_input.previousElementSibling)&&(s.classList.contains("active")&&n.removeActiveItem(i),n.setActiveItemClass(s)):n.moveCaret(e))}moveCaret(e){}getLastActive(e){let t=this.control.querySelector(".last-active") +if(t)return t +var i=this.control.querySelectorAll(".active") +return i?F(i,e):void 0}setCaret(e){this.caretPos=this.items.length}controlChildren(){return Array.from(this.control.querySelectorAll("[data-ts-item]"))}lock(){this.close(),this.isLocked=!0,this.refreshState()}unlock(){this.isLocked=!1,this.refreshState()}disable(){var e=this +e.input.disabled=!0,e.control_input.disabled=!0,e.focus_node.tabIndex=-1,e.isDisabled=!0,e.lock()}enable(){var e=this +e.input.disabled=!1,e.control_input.disabled=!1,e.focus_node.tabIndex=e.tabIndex,e.isDisabled=!1,e.unlock()}destroy(){var e=this,t=e.revertSettings +e.trigger("destroy"),e.off(),e.wrapper.remove(),e.dropdown.remove(),e.input.innerHTML=t.innerHTML,e.input.tabIndex=t.tabIndex,S(e.input,"tomselected","ts-hidden-accessible"),e._destroy(),delete e.input.tomselect}render(e,t){return"function"!=typeof this.settings.render[e]?null:this._render(e,t)}_render(e,t){var i,s,n="" +const o=this +return"option"!==e&&"item"!=e||(n=D(t[o.settings.valueField])),null==(s=o.settings.render[e].call(this,t,N))||(s=w(s),"option"===e||"option_create"===e?t[o.settings.disabledField]?P(s,{"aria-disabled":"true"}):P(s,{"data-selectable":""}):"optgroup"===e&&(i=t.group[o.settings.optgroupValueField],P(s,{"data-group":i}),t.group[o.settings.disabledField]&&P(s,{"data-disabled":""})),"option"!==e&&"item"!==e||(P(s,{"data-value":n}),"item"===e?(C(s,o.settings.itemClass),P(s,{"data-ts-item":""})):(C(s,o.settings.optionClass),P(s,{role:"option",id:t.$id}),o.options[n].$div=s))),s}clearCache(){y(this.options,((e,t)=>{e.$div&&(e.$div.remove(),delete e.$div)}))}uncacheValue(e){const t=this.getOption(e) +t&&t.remove()}canCreate(e){return this.settings.create&&e.length>0&&this.settings.createFilter.call(this,e)}hook(e,t,i){var s=this,n=s[t] +s[t]=function(){var t,o +return"after"===e&&(t=n.apply(s,arguments)),o=i.apply(s,arguments),"instead"===e?o:("before"===e&&(t=n.apply(s,arguments)),t)}}}return J.define("change_listener",(function(){B(this.input,"change",(()=>{this.sync()}))})),J.define("checkbox_options",(function(){var e=this,t=e.onOptionSelect +e.settings.hideSelected=!1 +var i=function(e){setTimeout((()=>{var t=e.querySelector("input") +e.classList.contains("selected")?t.checked=!0:t.checked=!1}),1)} +e.hook("after","setupTemplates",(()=>{var t=e.settings.render.option +e.settings.render.option=(i,s)=>{var n=w(t.call(e,i,s)),o=document.createElement("input") +o.addEventListener("click",(function(e){H(e)})),o.type="checkbox" +const r=q(i[e.settings.valueField]) +return r&&e.items.indexOf(r)>-1&&(o.checked=!0),n.prepend(o),n}})),e.on("item_remove",(t=>{var s=e.getOption(t) +s&&(s.classList.remove("selected"),i(s))})),e.hook("instead","onOptionSelect",((s,n)=>{if(n.classList.contains("selected"))return n.classList.remove("selected"),e.removeItem(n.dataset.value),e.refreshOptions(),void H(s,!0) +t.call(e,s,n),i(n)}))})),J.define("clear_button",(function(e){const t=this,i=Object.assign({className:"clear-button",title:"Clear All",html:e=>`
×
`},e) +t.on("initialize",(()=>{var e=w(i.html(i)) +e.addEventListener("click",(e=>{t.clear(),"single"===t.settings.mode&&t.settings.allowEmptyOption&&t.addItem(""),e.preventDefault(),e.stopPropagation()})),t.control.appendChild(e)}))})),J.define("drag_drop",(function(){var e=this +if(!$.fn.sortable)throw new Error('The "drag_drop" plugin requires jQuery UI "sortable".') +if("multi"===e.settings.mode){var t=e.lock,i=e.unlock +e.hook("instead","lock",(()=>{var i=$(e.control).data("sortable") +return i&&i.disable(),t.call(e)})),e.hook("instead","unlock",(()=>{var t=$(e.control).data("sortable") +return t&&t.enable(),i.call(e)})),e.on("initialize",(()=>{var t=$(e.control).sortable({items:"[data-value]",forcePlaceholderSize:!0,disabled:e.isLocked,start:(e,i)=>{i.placeholder.css("width",i.helper.css("width")),t.css({overflow:"visible"})},stop:()=>{t.css({overflow:"hidden"}) +var i=[] +t.children("[data-value]").each((function(){this.dataset.value&&i.push(this.dataset.value)})),e.setValue(i)}})}))}})),J.define("dropdown_header",(function(e){const t=this,i=Object.assign({title:"Untitled",headerClass:"dropdown-header",titleRowClass:"dropdown-header-title",labelClass:"dropdown-header-label",closeClass:"dropdown-header-close",html:e=>'
'+e.title+'×
'},e) +t.on("initialize",(()=>{var e=w(i.html(i)),s=e.querySelector("."+i.closeClass) +s&&s.addEventListener("click",(e=>{H(e,!0),t.close()})),t.dropdown.insertBefore(e,t.dropdown.firstChild)}))})),J.define("caret_position",(function(){var e=this +e.hook("instead","setCaret",(t=>{"single"!==e.settings.mode&&e.control.contains(e.control_input)?(t=Math.max(0,Math.min(e.items.length,t)))==e.caretPos||e.isPending||e.controlChildren().forEach(((i,s)=>{s{if(!e.isFocused)return +const i=e.getLastActive(t) +if(i){const s=L(i) +e.setCaret(t>0?s+1:s),e.setActiveItem()}else e.setCaret(e.caretPos+t)}))})),J.define("dropdown_input",(function(){var e=this +e.settings.shouldOpen=!0,e.hook("before","setup",(()=>{e.focus_node=e.control,C(e.control_input,"dropdown-input") +const t=w('