# Production Dockerfile for MotoVaultPro OCR Service # Uses mirrored base images from Gitea Package Registry # # Primary engine: PaddleOCR PP-OCRv4 (models baked into image) # Cloud fallback: Google Vision (optional, requires API key at runtime) # Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub) ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors FROM ${REGISTRY_MIRRORS}/python:3.12-slim # System dependencies # - libgomp1: OpenMP runtime required by PaddlePaddle # - libheif1/libheif-dev: HEIF image support (iPhone photos) # - libglib2.0-0: GLib shared library (OpenCV dependency) # - libmagic1: File type detection # - curl: Health check endpoint RUN apt-get update && apt-get install -y --no-install-recommends \ libgomp1 \ libheif1 \ libheif-dev \ libglib2.0-0 \ libmagic1 \ curl \ && rm -rf /var/lib/apt/lists/* # Python dependencies WORKDIR /app COPY requirements.txt . # Install dependencies. PaddleX (transitive via paddleocr) pulls in the full # opencv-python which requires libGL.so.1. Force-reinstall the headless # variant afterwards so the container stays GUI-free. RUN pip install --no-cache-dir -r requirements.txt \ && pip install --no-cache-dir --force-reinstall opencv-python-headless # Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime). # Models are baked into the image so container starts are fast and # no network access is needed at runtime for model download. ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu', enable_mkldnn=False)" \ && echo "PaddleOCR PP-OCRv4 models downloaded and verified" COPY . . EXPOSE 8000 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]