Files
motovaultpro/ocr/Dockerfile
Eric Gullickson 9209739e75 feat: add Auth0 WIF token script and update Dockerfile (refs #127)
- Create fetch-auth0-token.sh for Auth0 M2M -> GCP WIF token exchange
- Add jq to Dockerfile system dependencies
- Ensure script is executable in container image

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 20:51:30 -06:00

52 lines
2.0 KiB
Docker

# Production Dockerfile for MotoVaultPro OCR Service
# Uses mirrored base images from Gitea Package Registry
#
# Primary engine: Google Vision via Auth0 WIF (monthly-capped)
# Fallback engine: PaddleOCR PP-OCRv4 (models baked into image)
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
FROM ${REGISTRY_MIRRORS}/python:3.13-slim
# System dependencies
# - libgomp1: OpenMP runtime required by PaddlePaddle
# - libheif1/libheif-dev: HEIF image support (iPhone photos)
# - libglib2.0-0: GLib shared library (OpenCV dependency)
# - libmagic1: File type detection
# - curl: Health check endpoint + Auth0 token fetch
# - jq: JSON parsing for Auth0 token script
RUN apt-get update && apt-get install -y --no-install-recommends \
libgomp1 \
libheif1 \
libheif-dev \
libglib2.0-0 \
libmagic1 \
curl \
jq \
&& rm -rf /var/lib/apt/lists/*
# Python dependencies
WORKDIR /app
COPY requirements.txt .
# Install dependencies. PaddleX (transitive via paddleocr) pulls in the full
# opencv-python which requires libGL.so.1. Force-reinstall the headless
# variant afterwards so the container stays GUI-free.
RUN pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir --force-reinstall opencv-python-headless
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
# Models are baked into the image so container starts are fast and
# no network access is needed at runtime for model download.
ENV PADDLE_PDX_DISABLE_MODEL_SOURCE_CHECK=True
RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(ocr_version='PP-OCRv4', use_textline_orientation=True, lang='en', device='cpu', enable_mkldnn=False)" \
&& echo "PaddleOCR PP-OCRv4 models downloaded and verified"
COPY . .
# Ensure Auth0 WIF token script is executable
RUN chmod +x /app/scripts/fetch-auth0-token.sh
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]