76 lines
2.5 KiB
Docker
76 lines
2.5 KiB
Docker
|
|
# ==========================================
|
|||
|
|
# 阶段 1:前端构建 (Node.js Builder)
|
|||
|
|
# ==========================================
|
|||
|
|
FROM node:18-alpine AS frontend-builder
|
|||
|
|
|
|||
|
|
WORKDIR /build
|
|||
|
|
# 利用镜像缓存:先拷贝 package.json
|
|||
|
|
COPY web_ui/package*.json ./
|
|||
|
|
RUN npm config set registry https://registry.npmmirror.com && \
|
|||
|
|
npm install
|
|||
|
|
|
|||
|
|
# 拷贝源码并构建
|
|||
|
|
COPY web_ui/ .
|
|||
|
|
RUN npm run build
|
|||
|
|
|
|||
|
|
# ==========================================
|
|||
|
|
# 阶段 2:最终运行环境 (CoreX Runtime)
|
|||
|
|
# ==========================================
|
|||
|
|
FROM crpi-vofi3w62lkohhxsp.cn-shanghai.personal.cr.aliyuncs.com/opendatalab-mineru/corex:4.4.0_torch2.7.1_vllm0.11.2_py3.10
|
|||
|
|
|
|||
|
|
ENV DEBIAN_FRONTEND=noninteractive
|
|||
|
|
ENV MINERU_MODEL_SOURCE=local
|
|||
|
|
|
|||
|
|
# 1. 系统级依赖:仅安装运行时必需的字体和工具
|
|||
|
|
RUN apt-get update && apt-get install -y \
|
|||
|
|
fonts-noto-core \
|
|||
|
|
fonts-noto-cjk \
|
|||
|
|
fontconfig \
|
|||
|
|
libgl1-mesa-glx \
|
|||
|
|
&& fc-cache -fv \
|
|||
|
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
|||
|
|
|
|||
|
|
# 2. Python 依赖安装策略:
|
|||
|
|
# 使用 --no-deps 确保不覆盖 CoreX 预装的 torch 2.7.1 和 vllm 0.11.2
|
|||
|
|
RUN python3 -m pip install -U pip -i https://mirrors.aliyun.com/pypi/simple && \
|
|||
|
|
python3 -m pip install 'mineru[core]>=2.7.4' --no-deps -i https://mirrors.aliyun.com/pypi/simple && \
|
|||
|
|
# 手动补齐 MinerU 运行必需但基础镜像可能缺失的轻量依赖(不会破坏 torch)
|
|||
|
|
python3 -m pip install \
|
|||
|
|
"numpy==1.26.4" \
|
|||
|
|
"opencv-python==4.11.0.86" \
|
|||
|
|
"pydantic<2.0" \
|
|||
|
|
"modelscope" \
|
|||
|
|
"magic-pdf" \
|
|||
|
|
-i https://mirrors.aliyun.com/pypi/simple && \
|
|||
|
|
python3 -m pip cache purge
|
|||
|
|
|
|||
|
|
WORKDIR /app
|
|||
|
|
|
|||
|
|
# 3. 拷贝项目源码(排除 web_ui 源码以减小体积)
|
|||
|
|
COPY . .
|
|||
|
|
|
|||
|
|
# ... 前面步骤保持不变 ...
|
|||
|
|
|
|||
|
|
# 4. 从阶段 1 拷贝前端构建产物
|
|||
|
|
RUN mkdir -p mineru/cli/static/web
|
|||
|
|
COPY --from=frontend-builder /build/dist/ ./mineru/cli/static/web/
|
|||
|
|
|
|||
|
|
# 5. 配置文件优化:必须先于模型下载执行
|
|||
|
|
RUN mkdir -p /root/.cache/modelscope/hub/models && \
|
|||
|
|
echo '{ \
|
|||
|
|
"models-dir": "/root/.cache/modelscope/hub/models", \
|
|||
|
|
"device-mode": "gpu", \
|
|||
|
|
"vlm-config": { \
|
|||
|
|
"kind": "vllm", \
|
|||
|
|
"precision": "fp16" \
|
|||
|
|
} \
|
|||
|
|
}' > /root/magic-pdf.json
|
|||
|
|
|
|||
|
|
# 6. 模型预下载
|
|||
|
|
RUN /bin/bash -c "export MINERU_MODEL_SOURCE=local && mineru-models-download -s modelscope -m all"
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 7. 入口点
|
|||
|
|
ENTRYPOINT ["/bin/bash", "-c", "exec \"$@\"", "--"]
|
|||
|
|
CMD ["python3", "-m", "mineru.cli.main"]
|