chore: update Docker and compose files for PaddleOCR engine (refs #119)
- Replace libtesseract-dev with libgomp1 (OpenMP for PaddlePaddle) - Pre-download PP-OCRv4 models during Docker build - Add OCR engine env vars to all compose files (base, staging, prod) - Add optional Google Vision secret mount (commented, enable on demand) - Create google-vision-key.json.example placeholder Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -38,13 +38,17 @@ services:
|
|||||||
STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j
|
STRIPE_ENTERPRISE_MONTHLY_PRICE_ID: prod_Toj8xGEui9jl6j
|
||||||
STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn
|
STRIPE_ENTERPRISE_YEARLY_PRICE_ID: prod_Toj9A7A773xrdn
|
||||||
|
|
||||||
# OCR - Production log level
|
# OCR - Production log level + engine config
|
||||||
mvp-ocr:
|
mvp-ocr:
|
||||||
environment:
|
environment:
|
||||||
LOG_LEVEL: error
|
LOG_LEVEL: error
|
||||||
REDIS_HOST: mvp-redis
|
REDIS_HOST: mvp-redis
|
||||||
REDIS_PORT: 6379
|
REDIS_PORT: 6379
|
||||||
REDIS_DB: 1
|
REDIS_DB: 1
|
||||||
|
OCR_PRIMARY_ENGINE: paddleocr
|
||||||
|
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
|
||||||
|
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
|
||||||
|
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
|
||||||
|
|
||||||
# PostgreSQL - Remove dev ports, production log level
|
# PostgreSQL - Remove dev ports, production log level
|
||||||
mvp-postgres:
|
mvp-postgres:
|
||||||
|
|||||||
@@ -63,6 +63,15 @@ services:
|
|||||||
mvp-ocr:
|
mvp-ocr:
|
||||||
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
|
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
|
||||||
container_name: mvp-ocr-staging
|
container_name: mvp-ocr-staging
|
||||||
|
environment:
|
||||||
|
LOG_LEVEL: debug
|
||||||
|
REDIS_HOST: mvp-redis
|
||||||
|
REDIS_PORT: 6379
|
||||||
|
REDIS_DB: 1
|
||||||
|
OCR_PRIMARY_ENGINE: paddleocr
|
||||||
|
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
|
||||||
|
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
|
||||||
|
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
|
||||||
|
|
||||||
# ========================================
|
# ========================================
|
||||||
# PostgreSQL (Staging - Separate Database)
|
# PostgreSQL (Staging - Separate Database)
|
||||||
|
|||||||
@@ -193,8 +193,16 @@ services:
|
|||||||
REDIS_HOST: mvp-redis
|
REDIS_HOST: mvp-redis
|
||||||
REDIS_PORT: 6379
|
REDIS_PORT: 6379
|
||||||
REDIS_DB: 1
|
REDIS_DB: 1
|
||||||
|
# OCR engine configuration (PaddleOCR primary, cloud fallback optional)
|
||||||
|
OCR_PRIMARY_ENGINE: paddleocr
|
||||||
|
OCR_FALLBACK_ENGINE: ${OCR_FALLBACK_ENGINE:-none}
|
||||||
|
OCR_FALLBACK_THRESHOLD: ${OCR_FALLBACK_THRESHOLD:-0.6}
|
||||||
|
GOOGLE_VISION_KEY_PATH: /run/secrets/google-vision-key.json
|
||||||
volumes:
|
volumes:
|
||||||
- /tmp/vin-debug:/tmp/vin-debug
|
- /tmp/vin-debug:/tmp/vin-debug
|
||||||
|
# Optional: Uncomment to enable Google Vision cloud fallback.
|
||||||
|
# Requires: secrets/app/google-vision-key.json and OCR_FALLBACK_ENGINE=google_vision
|
||||||
|
# - ./secrets/app/google-vision-key.json:/run/secrets/google-vision-key.json:ro
|
||||||
networks:
|
networks:
|
||||||
- backend
|
- backend
|
||||||
- database
|
- database
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
# Production Dockerfile for MotoVaultPro OCR Service
|
# Production Dockerfile for MotoVaultPro OCR Service
|
||||||
# Uses mirrored base images from Gitea Package Registry
|
# Uses mirrored base images from Gitea Package Registry
|
||||||
|
#
|
||||||
|
# Primary engine: PaddleOCR PP-OCRv4 (models baked into image)
|
||||||
|
# Backward compat: Tesseract 5.x (optional, via TesseractEngine)
|
||||||
|
# Cloud fallback: Google Vision (optional, requires API key at runtime)
|
||||||
|
|
||||||
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
|
# Build argument for registry (defaults to Gitea mirrors, falls back to Docker Hub)
|
||||||
ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
|
ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
|
||||||
@@ -7,10 +11,16 @@ ARG REGISTRY_MIRRORS=git.motovaultpro.com/egullickson/mirrors
|
|||||||
FROM ${REGISTRY_MIRRORS}/python:3.13-slim
|
FROM ${REGISTRY_MIRRORS}/python:3.13-slim
|
||||||
|
|
||||||
# System dependencies
|
# System dependencies
|
||||||
|
# - tesseract-ocr/eng: Backward-compatible OCR engine (used by TesseractEngine)
|
||||||
|
# - libgomp1: OpenMP runtime required by PaddlePaddle
|
||||||
|
# - libheif1/libheif-dev: HEIF image support (iPhone photos)
|
||||||
|
# - libglib2.0-0: GLib shared library (OpenCV dependency)
|
||||||
|
# - libmagic1: File type detection
|
||||||
|
# - curl: Health check endpoint
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
tesseract-ocr \
|
tesseract-ocr \
|
||||||
tesseract-ocr-eng \
|
tesseract-ocr-eng \
|
||||||
libtesseract-dev \
|
libgomp1 \
|
||||||
libheif1 \
|
libheif1 \
|
||||||
libheif-dev \
|
libheif-dev \
|
||||||
libglib2.0-0 \
|
libglib2.0-0 \
|
||||||
@@ -23,6 +33,12 @@ WORKDIR /app
|
|||||||
COPY requirements.txt .
|
COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Pre-download PaddleOCR PP-OCRv4 models during build (not at runtime).
|
||||||
|
# Models are baked into the image so container starts are fast and
|
||||||
|
# no network access is needed at runtime for model download.
|
||||||
|
RUN python -c "from paddleocr import PaddleOCR; PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)" \
|
||||||
|
&& echo "PaddleOCR PP-OCRv4 models downloaded and verified"
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|||||||
18
secrets/app/google-vision-key.json.example
Normal file
18
secrets/app/google-vision-key.json.example
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"_comment": "Google Vision API service account key for OCR cloud fallback",
|
||||||
|
"_instructions": [
|
||||||
|
"1. Create a Google Cloud service account with Vision API access",
|
||||||
|
"2. Download the JSON key file",
|
||||||
|
"3. Save it as secrets/app/google-vision-key.json (gitignored)",
|
||||||
|
"4. Uncomment the volume mount in docker-compose.yml",
|
||||||
|
"5. Set OCR_FALLBACK_ENGINE=google_vision"
|
||||||
|
],
|
||||||
|
"type": "service_account",
|
||||||
|
"project_id": "your-project-id",
|
||||||
|
"private_key_id": "",
|
||||||
|
"private_key": "",
|
||||||
|
"client_email": "your-sa@your-project-id.iam.gserviceaccount.com",
|
||||||
|
"client_id": "",
|
||||||
|
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||||
|
"token_uri": "https://oauth2.googleapis.com/token"
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user