diff --git a/.gitea/workflows/production.yaml b/.gitea/workflows/production.yaml index 76dc966..c87866e 100644 --- a/.gitea/workflows/production.yaml +++ b/.gitea/workflows/production.yaml @@ -34,6 +34,7 @@ jobs: target_stack: ${{ steps.determine-stack.outputs.target_stack }} backend_image: ${{ steps.set-images.outputs.backend_image }} frontend_image: ${{ steps.set-images.outputs.frontend_image }} + ocr_image: ${{ steps.set-images.outputs.ocr_image }} steps: - name: Check Docker availability run: | @@ -53,6 +54,7 @@ jobs: TAG="${{ inputs.image_tag }}" echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT + echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT - name: Determine target stack id: determine-stack @@ -83,6 +85,7 @@ jobs: TARGET_STACK: ${{ needs.validate.outputs.target_stack }} BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }} FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }} + OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }} steps: - name: Checkout scripts, config, and compose files uses: actions/checkout@v4 @@ -138,6 +141,7 @@ jobs: run: | docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE + docker pull $OCR_IMAGE - name: Record expected image IDs id: expected-images @@ -155,10 +159,12 @@ jobs: cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE + export OCR_IMAGE=$OCR_IMAGE # --force-recreate ensures containers are recreated even if image tag is same # This prevents stale container content when image digest changes + # Start shared OCR service and target stack docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \ - mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK + mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK - name: Wait for stack initialization run: sleep 10 diff --git a/.gitea/workflows/staging.yaml b/.gitea/workflows/staging.yaml index 0605af5..60b687e 100644 --- a/.gitea/workflows/staging.yaml +++ b/.gitea/workflows/staging.yaml @@ -29,6 +29,7 @@ jobs: outputs: backend_image: ${{ steps.tags.outputs.backend_image }} frontend_image: ${{ steps.tags.outputs.frontend_image }} + ocr_image: ${{ steps.tags.outputs.ocr_image }} short_sha: ${{ steps.tags.outputs.short_sha }} steps: - name: Checkout code @@ -45,6 +46,7 @@ jobs: SHORT_SHA="${SHORT_SHA:0:7}" echo "backend_image=$REGISTRY/egullickson/backend:$SHORT_SHA" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$SHORT_SHA" >> $GITHUB_OUTPUT + echo "ocr_image=$REGISTRY/egullickson/ocr:$SHORT_SHA" >> $GITHUB_OUTPUT echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT - name: Build backend image @@ -74,12 +76,24 @@ jobs: -f frontend/Dockerfile \ frontend + - name: Build OCR image + run: | + docker build \ + --build-arg BUILDKIT_INLINE_CACHE=1 \ + --cache-from $REGISTRY/egullickson/ocr:latest \ + -t ${{ steps.tags.outputs.ocr_image }} \ + -t $REGISTRY/egullickson/ocr:latest \ + -f ocr/Dockerfile \ + ocr + - name: Push images run: | docker push ${{ steps.tags.outputs.backend_image }} docker push ${{ steps.tags.outputs.frontend_image }} + docker push ${{ steps.tags.outputs.ocr_image }} docker push $REGISTRY/egullickson/backend:latest docker push $REGISTRY/egullickson/frontend:latest + docker push $REGISTRY/egullickson/ocr:latest # ============================================ # DEPLOY STAGING - Deploy to staging server @@ -91,6 +105,7 @@ jobs: env: BACKEND_IMAGE: ${{ needs.build.outputs.backend_image }} FRONTEND_IMAGE: ${{ needs.build.outputs.frontend_image }} + OCR_IMAGE: ${{ needs.build.outputs.ocr_image }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -139,12 +154,14 @@ jobs: run: | docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE + docker pull $OCR_IMAGE - name: Deploy staging stack run: | cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE + export OCR_IMAGE=$OCR_IMAGE docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d @@ -164,7 +181,7 @@ jobs: - name: Check container status and health run: | - for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do + for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found") if [ "$status" != "running" ]; then echo "ERROR: $service is not running (status: $status)" @@ -177,11 +194,12 @@ jobs: # Wait for Docker healthchecks to complete (services with healthcheck defined) echo "" echo "Waiting for Docker healthchecks..." - for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do + for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do # Check if service has a healthcheck defined has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false") if [ "$has_healthcheck" = "true" ]; then - for i in 1 2 3 4 5 6 7 8 9 10; do + # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min) + for i in $(seq 1 24); do health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") if [ "$health" = "healthy" ]; then echo "OK: $service is healthy" @@ -191,13 +209,13 @@ jobs: docker logs $service --tail 50 2>/dev/null || true exit 1 fi - if [ $i -eq 10 ]; then + if [ $i -eq 24 ]; then echo "ERROR: $service health check timed out (status: $health)" docker logs $service --tail 50 2>/dev/null || true exit 1 fi - echo "Waiting for $service healthcheck... (attempt $i/10, status: $health)" - sleep 5 + echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)" + sleep 10 done else echo "SKIP: $service has no healthcheck defined" diff --git a/backend/src/_system/migrations/run-all.ts b/backend/src/_system/migrations/run-all.ts index 136a8f3..a5d6e49 100644 --- a/backend/src/_system/migrations/run-all.ts +++ b/backend/src/_system/migrations/run-all.ts @@ -17,7 +17,8 @@ const pool = new Pool({ const MIGRATION_ORDER = [ 'features/vehicles', // Primary entity, defines update_updated_at_column() 'features/platform', // Normalized make/model/trim schema for dropdowns - 'features/documents', // Depends on vehicles; provides documents table + 'features/user-profile', // User profile management; needed by documents migration + 'features/documents', // Depends on vehicles, user-profile; provides documents table 'core/user-preferences', // Depends on update_updated_at_column() 'features/fuel-logs', // Depends on vehicles 'features/maintenance', // Depends on vehicles @@ -25,7 +26,6 @@ const MIGRATION_ORDER = [ 'features/admin', // Admin role management and oversight; depends on update_updated_at_column() 'features/backup', // Admin backup feature; depends on update_updated_at_column() 'features/notifications', // Depends on maintenance and documents - 'features/user-profile', // User profile management; independent 'features/terms-agreement', // Terms & Conditions acceptance audit trail 'features/audit-log', // Centralized audit logging; independent 'features/ownership-costs', // Depends on vehicles and documents; TCO recurring costs diff --git a/docker-compose.blue-green.yml b/docker-compose.blue-green.yml index 9c8ff2d..fb5d61a 100644 --- a/docker-compose.blue-green.yml +++ b/docker-compose.blue-green.yml @@ -194,6 +194,12 @@ services: - "com.motovaultpro.stack=green" - "com.motovaultpro.service=backend" + # ======================================== + # Shared Service - OCR Processing + # ======================================== + mvp-ocr: + image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} + # ======================================== # Override Traefik to add dynamic config # ======================================== diff --git a/docker-compose.staging.yml b/docker-compose.staging.yml index 79b537e..df667b3 100644 --- a/docker-compose.staging.yml +++ b/docker-compose.staging.yml @@ -55,6 +55,13 @@ services: - "traefik.http.routers.mvp-backend-health.priority=30" - "traefik.http.services.mvp-backend.loadbalancer.server.port=3001" + # ======================================== + # OCR Service (Staging) + # ======================================== + mvp-ocr: + image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest} + container_name: mvp-ocr-staging + # ======================================== # PostgreSQL (Staging - Separate Database) # ======================================== diff --git a/docker-compose.yml b/docker-compose.yml index c2cca9f..17abbf4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -140,8 +140,8 @@ services: - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))" interval: 30s timeout: 10s - retries: 3 - start_period: 40s + retries: 5 + start_period: 180s labels: - "traefik.enable=true" - "traefik.docker.network=motovaultpro_backend" @@ -164,6 +164,24 @@ services: - "traefik.http.services.mvp-backend.loadbalancer.healthcheck.timeout=10s" - "traefik.http.services.mvp-backend.loadbalancer.passhostheader=true" + # Application Services - OCR Processing + mvp-ocr: + build: + context: ./ocr + dockerfile: Dockerfile + container_name: mvp-ocr + restart: unless-stopped + environment: + LOG_LEVEL: info + networks: + - backend + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + # Database Services - Application PostgreSQL mvp-postgres: image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/postgres:18-alpine diff --git a/ocr/Dockerfile b/ocr/Dockerfile new file mode 100644 index 0000000..364ba97 --- /dev/null +++ b/ocr/Dockerfile @@ -0,0 +1,23 @@ +FROM python:3.11-slim + +# System dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + tesseract-ocr \ + tesseract-ocr-eng \ + libtesseract-dev \ + libheif1 \ + libheif-dev \ + libglib2.0-0 \ + libmagic1 \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Python dependencies +WORKDIR /app +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/ocr/app/__init__.py b/ocr/app/__init__.py new file mode 100644 index 0000000..f6ef689 --- /dev/null +++ b/ocr/app/__init__.py @@ -0,0 +1 @@ +# OCR Service Application diff --git a/ocr/app/config.py b/ocr/app/config.py new file mode 100644 index 0000000..a0f4ada --- /dev/null +++ b/ocr/app/config.py @@ -0,0 +1,15 @@ +"""OCR Service Configuration.""" +import os + + +class Settings: + """Application settings loaded from environment variables.""" + + def __init__(self) -> None: + self.log_level: str = os.getenv("LOG_LEVEL", "info") + self.host: str = os.getenv("HOST", "0.0.0.0") + self.port: int = int(os.getenv("PORT", "8000")) + self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract") + + +settings = Settings() diff --git a/ocr/app/main.py b/ocr/app/main.py new file mode 100644 index 0000000..4553aa9 --- /dev/null +++ b/ocr/app/main.py @@ -0,0 +1,26 @@ +"""OCR Service FastAPI Application.""" +from fastapi import FastAPI + +from app.config import settings + +app = FastAPI( + title="MotoVaultPro OCR Service", + description="OCR processing service for vehicle documents", + version="1.0.0", +) + + +@app.get("/health") +async def health_check() -> dict: + """Health check endpoint for container orchestration.""" + return {"status": "healthy"} + + +@app.get("/") +async def root() -> dict: + """Root endpoint with service information.""" + return { + "service": "mvp-ocr", + "version": "1.0.0", + "log_level": settings.log_level, + } diff --git a/ocr/requirements.txt b/ocr/requirements.txt new file mode 100644 index 0000000..d14a652 --- /dev/null +++ b/ocr/requirements.txt @@ -0,0 +1,20 @@ +# API Framework +fastapi>=0.100.0 +uvicorn[standard]>=0.23.0 +python-multipart>=0.0.6 + +# File Detection & Handling +python-magic>=0.4.27 +pillow>=10.0.0 +pillow-heif>=0.13.0 + +# Image Preprocessing +opencv-python-headless>=4.8.0 +numpy>=1.24.0 + +# OCR Engines +pytesseract>=0.3.10 + +# Testing +pytest>=7.4.0 +httpx>=0.24.0 diff --git a/ocr/tests/__init__.py b/ocr/tests/__init__.py new file mode 100644 index 0000000..a0355b8 --- /dev/null +++ b/ocr/tests/__init__.py @@ -0,0 +1 @@ +# OCR Service Tests diff --git a/ocr/tests/test_health.py b/ocr/tests/test_health.py new file mode 100644 index 0000000..cd1e914 --- /dev/null +++ b/ocr/tests/test_health.py @@ -0,0 +1,52 @@ +"""Tests for OCR service health and core functionality.""" +import io + +import pytest +from fastapi.testclient import TestClient +from PIL import Image + +from app.main import app + + +@pytest.fixture +def client(): + """Create test client for FastAPI app.""" + return TestClient(app) + + +def test_health_endpoint(client): + """Health endpoint returns healthy status.""" + response = client.get("/health") + assert response.status_code == 200 + assert response.json() == {"status": "healthy"} + + +def test_root_endpoint(client): + """Root endpoint returns service information.""" + response = client.get("/") + assert response.status_code == 200 + data = response.json() + assert data["service"] == "mvp-ocr" + assert "version" in data + + +def test_pillow_heif_can_register(): + """pillow-heif can register with Pillow for HEIC support.""" + import pillow_heif + + pillow_heif.register_heif_opener() + # Verify HEIC format is registered + assert "HEIF" in Image.registered_extensions().values() + + +def test_tesseract_available(): + """Tesseract OCR is available and can process images.""" + import pytesseract + + # Create a simple test image with text + img = Image.new("RGB", (200, 50), color="white") + + # Verify pytesseract can call tesseract (will return empty string for blank image) + result = pytesseract.image_to_string(img) + # Just verify it doesn't raise an exception - blank image returns empty/whitespace + assert isinstance(result, str)