From 1ba491144b1c645622589688ed41eedd9b54b013 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 1 Feb 2026 13:06:16 -0600 Subject: [PATCH 1/5] feat: add OCR service container (refs #64) Add Python-based OCR service container (mvp-ocr) as the 6th service: - Python 3.11-slim with FastAPI/uvicorn - Tesseract OCR with English language pack - pillow-heif for HEIC image support - opencv-python-headless for image preprocessing - Health endpoint at /health - Unit tests for health, HEIC support, and Tesseract availability Co-Authored-By: Claude Opus 4.5 --- docker-compose.yml | 18 ++++++++++++++ ocr/Dockerfile | 23 ++++++++++++++++++ ocr/app/__init__.py | 1 + ocr/app/config.py | 15 ++++++++++++ ocr/app/main.py | 26 ++++++++++++++++++++ ocr/requirements.txt | 20 ++++++++++++++++ ocr/tests/__init__.py | 1 + ocr/tests/test_health.py | 52 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 156 insertions(+) create mode 100644 ocr/Dockerfile create mode 100644 ocr/app/__init__.py create mode 100644 ocr/app/config.py create mode 100644 ocr/app/main.py create mode 100644 ocr/requirements.txt create mode 100644 ocr/tests/__init__.py create mode 100644 ocr/tests/test_health.py diff --git a/docker-compose.yml b/docker-compose.yml index c2cca9f..095dd93 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -164,6 +164,24 @@ services: - "traefik.http.services.mvp-backend.loadbalancer.healthcheck.timeout=10s" - "traefik.http.services.mvp-backend.loadbalancer.passhostheader=true" + # Application Services - OCR Processing + mvp-ocr: + build: + context: ./ocr + dockerfile: Dockerfile + container_name: mvp-ocr + restart: unless-stopped + environment: + LOG_LEVEL: info + networks: + - backend + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Database Services - Application PostgreSQL mvp-postgres: image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/postgres:18-alpine diff --git a/ocr/Dockerfile b/ocr/Dockerfile new file mode 100644 index 0000000..364ba97 --- /dev/null +++ b/ocr/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +# System dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-eng \ + libtesseract-dev \ + libheif1 \ + libheif-dev \ + libglib2.0-0 \ + libmagic1 \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Python dependencies +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/ocr/app/__init__.py b/ocr/app/__init__.py new file mode 100644 index 0000000..f6ef689 --- /dev/null +++ b/ocr/app/__init__.py @@ -0,0 +1 @@ +# OCR Service Application diff --git a/ocr/app/config.py b/ocr/app/config.py new file mode 100644 index 0000000..a0f4ada --- /dev/null +++ b/ocr/app/config.py @@ -0,0 +1,15 @@ +"""OCR Service Configuration.""" +import os + + +class Settings: + """Application settings loaded from environment variables.""" + + def __init__(self) -> None: + self.log_level: str = os.getenv("LOG_LEVEL", "info") + self.host: str = os.getenv("HOST", "0.0.0.0") + self.port: int = int(os.getenv("PORT", "8000")) + self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") + + +settings = Settings() diff --git a/ocr/app/main.py b/ocr/app/main.py new file mode 100644 index 0000000..4553aa9 --- /dev/null +++ b/ocr/app/main.py @@ -0,0 +1,26 @@ +"""OCR Service FastAPI Application.""" +from fastapi import FastAPI + +from app.config import settings + +app = FastAPI( + title="MotoVaultPro OCR Service", + description="OCR processing service for vehicle documents", + version="1.0.0", +) + + +@app.get("/health") +async def health_check() -> dict: + """Health check endpoint for container orchestration.""" + return {"status": "healthy"} + + +@app.get("/") +async def root() -> dict: + """Root endpoint with service information.""" + return { + "service": "mvp-ocr", + "version": "1.0.0", + "log_level": settings.log_level, + } diff --git a/ocr/requirements.txt b/ocr/requirements.txt new file mode 100644 index 0000000..d14a652 --- /dev/null +++ b/ocr/requirements.txt @@ -0,0 +1,20 @@ +# API Framework +fastapi>=0.100.0 +uvicorn[standard]>=0.23.0 +python-multipart>=0.0.6 + +# File Detection & Handling +python-magic>=0.4.27 +pillow>=10.0.0 +pillow-heif>=0.13.0 + +# Image Preprocessing +opencv-python-headless>=4.8.0 +numpy>=1.24.0 + +# OCR Engines +pytesseract>=0.3.10 + +# Testing +pytest>=7.4.0 +httpx>=0.24.0 diff --git a/ocr/tests/__init__.py b/ocr/tests/__init__.py new file mode 100644 index 0000000..a0355b8 --- /dev/null +++ b/ocr/tests/__init__.py @@ -0,0 +1 @@ +# OCR Service Tests diff --git a/ocr/tests/test_health.py b/ocr/tests/test_health.py new file mode 100644 index 0000000..cd1e914 --- /dev/null +++ b/ocr/tests/test_health.py @@ -0,0 +1,52 @@ +"""Tests for OCR service health and core functionality.""" +import io + +import pytest +from fastapi.testclient import TestClient +from PIL import Image + +from app.main import app + + +@pytest.fixture +def client(): + """Create test client for FastAPI app.""" + return TestClient(app) + + +def test_health_endpoint(client): + """Health endpoint returns healthy status.""" + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + + +def test_root_endpoint(client): + """Root endpoint returns service information.""" + response = client.get("/") + assert response.status_code == 200 + data = response.json() + assert data["service"] == "mvp-ocr" + assert "version" in data + + +def test_pillow_heif_can_register(): + """pillow-heif can register with Pillow for HEIC support.""" + import pillow_heif + + pillow_heif.register_heif_opener() + # Verify HEIC format is registered + assert "HEIF" in Image.registered_extensions().values() + + +def test_tesseract_available(): + """Tesseract OCR is available and can process images.""" + import pytesseract + + # Create a simple test image with text + img = Image.new("RGB", (200, 50), color="white") + + # Verify pytesseract can call tesseract (will return empty string for blank image) + result = pytesseract.image_to_string(img) + # Just verify it doesn't raise an exception - blank image returns empty/whitespace + assert isinstance(result, str) From 99ee00b2250f25a1111746e57656467aa3b1909c Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 1 Feb 2026 13:19:30 -0600 Subject: [PATCH 2/5] fix: add OCR image to CI/CD workflows (refs #64) - Add OCR image build/push to staging workflow - Add OCR service with image override to staging compose - Add OCR service with image override to blue-green compose - Add OCR image pull/deploy to production workflow - Include mvp-ocr-staging in health checks The OCR container is a shared service (like postgres/redis), not part of blue-green deployment. Co-Authored-By: Claude Opus 4.5 --- .gitea/workflows/production.yaml | 8 +++++++- .gitea/workflows/staging.yaml | 21 +++++++++++++++++++-- docker-compose.blue-green.yml | 6 ++++++ docker-compose.staging.yml | 7 +++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/.gitea/workflows/production.yaml b/.gitea/workflows/production.yaml index 76dc966..c87866e 100644 --- a/.gitea/workflows/production.yaml +++ b/.gitea/workflows/production.yaml @@ -34,6 +34,7 @@ jobs: target_stack: ${{ steps.determine-stack.outputs.target_stack }} backend_image: ${{ steps.set-images.outputs.backend_image }} frontend_image: ${{ steps.set-images.outputs.frontend_image }} + ocr_image: ${{ steps.set-images.outputs.ocr_image }} steps: - name: Check Docker availability run: | @@ -53,6 +54,7 @@ jobs: TAG="${{ inputs.image_tag }}" echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT + echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT - name: Determine target stack id: determine-stack @@ -83,6 +85,7 @@ jobs: TARGET_STACK: ${{ needs.validate.outputs.target_stack }} BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }} FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }} + OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }} steps: - name: Checkout scripts, config, and compose files uses: actions/checkout@v4 @@ -138,6 +141,7 @@ jobs: run: | docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE + docker pull $OCR_IMAGE - name: Record expected image IDs id: expected-images @@ -155,10 +159,12 @@ jobs: cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE + export OCR_IMAGE=$OCR_IMAGE # --force-recreate ensures containers are recreated even if image tag is same # This prevents stale container content when image digest changes + # Start shared OCR service and target stack docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \ - mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK + mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK - name: Wait for stack initialization run: sleep 10 diff --git a/.gitea/workflows/staging.yaml b/.gitea/workflows/staging.yaml index 0605af5..78d0e9d 100644 --- a/.gitea/workflows/staging.yaml +++ b/.gitea/workflows/staging.yaml @@ -29,6 +29,7 @@ jobs: outputs: backend_image: ${{ steps.tags.outputs.backend_image }} frontend_image: ${{ steps.tags.outputs.frontend_image }} + ocr_image: ${{ steps.tags.outputs.ocr_image }} short_sha: ${{ steps.tags.outputs.short_sha }} steps: - name: Checkout code @@ -45,6 +46,7 @@ jobs: SHORT_SHA="${SHORT_SHA:0:7}" echo "backend_image=$REGISTRY/egullickson/backend:$SHORT_SHA" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$SHORT_SHA" >> $GITHUB_OUTPUT + echo "ocr_image=$REGISTRY/egullickson/ocr:$SHORT_SHA" >> $GITHUB_OUTPUT echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT - name: Build backend image @@ -74,12 +76,24 @@ jobs: -f frontend/Dockerfile \ frontend + - name: Build OCR image + run: | + docker build \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --cache-from $REGISTRY/egullickson/ocr:latest \ + -t ${{ steps.tags.outputs.ocr_image }} \ + -t $REGISTRY/egullickson/ocr:latest \ + -f ocr/Dockerfile \ + ocr + - name: Push images run: | docker push ${{ steps.tags.outputs.backend_image }} docker push ${{ steps.tags.outputs.frontend_image }} + docker push ${{ steps.tags.outputs.ocr_image }} docker push $REGISTRY/egullickson/backend:latest docker push $REGISTRY/egullickson/frontend:latest + docker push $REGISTRY/egullickson/ocr:latest # ============================================ # DEPLOY STAGING - Deploy to staging server @@ -91,6 +105,7 @@ jobs: env: BACKEND_IMAGE: ${{ needs.build.outputs.backend_image }} FRONTEND_IMAGE: ${{ needs.build.outputs.frontend_image }} + OCR_IMAGE: ${{ needs.build.outputs.ocr_image }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -139,12 +154,14 @@ jobs: run: | docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE + docker pull $OCR_IMAGE - name: Deploy staging stack run: | cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE + export OCR_IMAGE=$OCR_IMAGE docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d @@ -164,7 +181,7 @@ jobs: - name: Check container status and health run: | - for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do + for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found") if [ "$status" != "running" ]; then echo "ERROR: $service is not running (status: $status)" @@ -177,7 +194,7 @@ jobs: # Wait for Docker healthchecks to complete (services with healthcheck defined) echo "" echo "Waiting for Docker healthchecks..." - for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do + for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do # Check if service has a healthcheck defined has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false") if [ "$has_healthcheck" = "true" ]; then diff --git a/docker-compose.blue-green.yml b/docker-compose.blue-green.yml index 9c8ff2d..fb5d61a 100644 --- a/docker-compose.blue-green.yml +++ b/docker-compose.blue-green.yml @@ -194,6 +194,12 @@ services: - "com.motovaultpro.stack=green" - "com.motovaultpro.service=backend" + # ======================================== + # Shared Service - OCR Processing + # ======================================== + mvp-ocr: + image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} + # ======================================== # Override Traefik to add dynamic config # ======================================== diff --git a/docker-compose.staging.yml b/docker-compose.staging.yml index 79b537e..df667b3 100644 --- a/docker-compose.staging.yml +++ b/docker-compose.staging.yml @@ -55,6 +55,13 @@ services: - "traefik.http.routers.mvp-backend-health.priority=30" - "traefik.http.services.mvp-backend.loadbalancer.server.port=3001" + # ======================================== + # OCR Service (Staging) + # ======================================== + mvp-ocr: + image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} + container_name: mvp-ocr-staging + # ======================================== # PostgreSQL (Staging - Separate Database) # ======================================== From 3781b05d720307ce54d0dafac11f6d0e183a287b Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 1 Feb 2026 13:28:07 -0600 Subject: [PATCH 3/5] fix: move user-profile before documents in migration order (refs #64) The documents migration 003_reset_scan_for_maintenance_free_users.sql depends on user_profiles table which is created by user-profile feature. Move user-profile earlier in MIGRATION_ORDER to fix staging deployment. Co-Authored-By: Claude Opus 4.5 --- backend/src/_system/migrations/run-all.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/src/_system/migrations/run-all.ts b/backend/src/_system/migrations/run-all.ts index 136a8f3..a5d6e49 100644 --- a/backend/src/_system/migrations/run-all.ts +++ b/backend/src/_system/migrations/run-all.ts @@ -17,7 +17,8 @@ const pool = new Pool({ const MIGRATION_ORDER = [ 'features/vehicles', // Primary entity, defines update_updated_at_column() 'features/platform', // Normalized make/model/trim schema for dropdowns - 'features/documents', // Depends on vehicles; provides documents table + 'features/user-profile', // User profile management; needed by documents migration + 'features/documents', // Depends on vehicles, user-profile; provides documents table 'core/user-preferences', // Depends on update_updated_at_column() 'features/fuel-logs', // Depends on vehicles 'features/maintenance', // Depends on vehicles @@ -25,7 +26,6 @@ const MIGRATION_ORDER = [ 'features/admin', // Admin role management and oversight; depends on update_updated_at_column() 'features/backup', // Admin backup feature; depends on update_updated_at_column() 'features/notifications', // Depends on maintenance and documents - 'features/user-profile', // User profile management; independent 'features/terms-agreement', // Terms & Conditions acceptance audit trail 'features/audit-log', // Centralized audit logging; independent 'features/ownership-costs', // Depends on vehicles and documents; TCO recurring costs From 99fbf2bbb7d133c9f9763cd0a7e3264f77a15f9e Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 1 Feb 2026 13:54:59 -0600 Subject: [PATCH 4/5] fix: increase staging health check timeout to 4 minutes (refs #64) Backend with fresh migrations can take ~3 minutes to start. Increased from 10x5s (50s) to 24x10s (240s) to accommodate. Co-Authored-By: Claude Opus 4.5 --- .gitea/workflows/staging.yaml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.gitea/workflows/staging.yaml b/.gitea/workflows/staging.yaml index 78d0e9d..60b687e 100644 --- a/.gitea/workflows/staging.yaml +++ b/.gitea/workflows/staging.yaml @@ -198,7 +198,8 @@ jobs: # Check if service has a healthcheck defined has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false") if [ "$has_healthcheck" = "true" ]; then - for i in 1 2 3 4 5 6 7 8 9 10; do + # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min) + for i in $(seq 1 24); do health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") if [ "$health" = "healthy" ]; then echo "OK: $service is healthy" @@ -208,13 +209,13 @@ jobs: docker logs $service --tail 50 2>/dev/null || true exit 1 fi - if [ $i -eq 10 ]; then + if [ $i -eq 24 ]; then echo "ERROR: $service health check timed out (status: $health)" docker logs $service --tail 50 2>/dev/null || true exit 1 fi - echo "Waiting for $service healthcheck... (attempt $i/10, status: $health)" - sleep 5 + echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)" + sleep 10 done else echo "SKIP: $service has no healthcheck defined" From a31028401ba2ce2c1a7a644364c606bde5028532 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 1 Feb 2026 14:43:24 -0600 Subject: [PATCH 5/5] fix: increase backend Docker healthcheck start_period to 3 minutes (refs #64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CI was failing because Docker marked the backend unhealthy before the CI wait loop completed. The backend needs time to run migrations and seed vehicle data on startup. Changes: - start_period: 40s -> 180s (3 minutes) - retries: 3 -> 5 (more tolerance) Total time before unhealthy: 180s + (5 × 30s) = 5.5 minutes Co-Authored-By: Claude Opus 4.5 --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 095dd93..17abbf4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -140,8 +140,8 @@ services: - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))" interval: 30s timeout: 10s - retries: 3 - start_period: 40s + retries: 5 + start_period: 180s labels: - "traefik.enable=true" - "traefik.docker.network=motovaultpro_backend"