chore: update Docker and compose files for PaddleOCR engine (refs #119)

- Replace libtesseract-dev with libgomp1 (OpenMP for PaddlePaddle)
- Pre-download PP-OCRv4 models during Docker build
- Add OCR engine env vars to all compose files (base, staging, prod)
- Add optional Google Vision secret mount (commented, enable on demand)
- Create google-vision-key.json.example placeholder

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-07 11:17:44 -06:00
parent 4ef942cb9d
commit 9b6417379b
5 changed files with 57 additions and 2 deletions

View File

@@ -38,13 +38,17 @@ services:
STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j
STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn
# OCR - Production log level
# OCR - Production log level + engine config
mvp-ocr:
environment:
LOG_LEVEL: error
REDIS_HOST: mvp-redis
REDIS_PORT: 6379
REDIS_DB: 1
OCR_PRIMARY_ENGINE: paddleocr
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
# PostgreSQL - Remove dev ports, production log level
mvp-postgres:

View File

@@ -63,6 +63,15 @@ services:
mvp-ocr:
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
container_name: mvp-ocr-staging
environment:
LOG_LEVEL: debug
REDIS_HOST: mvp-redis
REDIS_PORT: 6379
REDIS_DB: 1
OCR_PRIMARY_ENGINE: paddleocr
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
# ========================================
# PostgreSQL (Staging - Separate Database)

View File

@@ -193,8 +193,16 @@ services:
REDIS_HOST: mvp-redis
REDIS_PORT: 6379
REDIS_DB: 1
# OCR engine configuration (PaddleOCR primary, cloud fallback optional)
OCR_PRIMARY_ENGINE: paddleocr
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
volumes:
- /tmp/vin-debug:/tmp/vin-debug
# Optional: Uncomment to enable Google Vision cloud fallback.
# Requires: secrets/app/google-vision-key.json and OCR_FALLBACK_ENGINE=google_vision
# - ./secrets/app/google-vision-key.json:/run/secrets/google-vision-key.json:ro
networks:
- backend
- database

View File

@@ -1,5 +1,9 @@
# Production Dockerfile for MotoVaultPro OCR Service
# Uses mirrored base images from Gitea Package Registry
#
# Primary engine: PaddleOCR PP-OCRv4 (models baked into image)
# Backward compat: Tesseract 5.x (optional, via TesseractEngine)
# Cloud fallback: Google Vision (optional, requires API key at runtime)
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
@@ -7,10 +11,16 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
FROM ${REGISTRY_MIRRORS}/python:3.13-slim
# System dependencies
# - tesseract-ocr/eng: Backward-compatible OCR engine (used by TesseractEngine)
# - libgomp1: OpenMP runtime required by PaddlePaddle
# - libheif1/libheif-dev: HEIF image support (iPhone photos)
# - libglib2.0-0: GLib shared library (OpenCV dependency)
# - libmagic1: File type detection
# - curl: Health check endpoint
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-eng \
libtesseract-dev \
libgomp1 \
libheif1 \
libheif-dev \
libglib2.0-0 \
@@ -23,6 +33,12 @@ WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
# Models are baked into the image so container starts are fast and
# no network access is needed at runtime for model download.
RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \
&& echo "PaddleOCR PP-OCRv4 models downloaded and verified"
COPY . .
EXPOSE 8000

View File

@@ -0,0 +1,18 @@
{
"_comment": "Google Vision API service account key for OCR cloud fallback",
"_instructions": [
"1. Create a Google Cloud service account with Vision API access",
"2. Download the JSON key file",
"3. Save it as secrets/app/google-vision-key.json (gitignored)",
"4. Uncomment the volume mount in docker-compose.yml",
"5. Set OCR_FALLBACK_ENGINE=google_vision"
],
"type": "service_account",
"project_id": "your-project-id",
"private_key_id": "",
"private_key": "",
"client_email": "your-sa@your-project-id.iam.gserviceaccount.com",
"client_id": "",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token"
}