feat: add OCR service container (refs #64) #72

Merged
egullickson merged 5 commits from issue-64-ocr-container-setup into main 2026-02-01 20:49:52 +00:00
13 changed files with 204 additions and 11 deletions

View File

@@ -34,6 +34,7 @@ jobs:
target_stack: ${{ steps.determine-stack.outputs.target_stack }} target_stack: ${{ steps.determine-stack.outputs.target_stack }}
backend_image: ${{ steps.set-images.outputs.backend_image }} backend_image: ${{ steps.set-images.outputs.backend_image }}
frontend_image: ${{ steps.set-images.outputs.frontend_image }} frontend_image: ${{ steps.set-images.outputs.frontend_image }}
ocr_image: ${{ steps.set-images.outputs.ocr_image }}
steps: steps:
- name: Check Docker availability - name: Check Docker availability
run: | run: |
@@ -53,6 +54,7 @@ jobs:
TAG="${{ inputs.image_tag }}" TAG="${{ inputs.image_tag }}"
echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT
echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT
echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT
- name: Determine target stack - name: Determine target stack
id: determine-stack id: determine-stack
@@ -83,6 +85,7 @@ jobs:
TARGET_STACK: ${{ needs.validate.outputs.target_stack }} TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }} BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }}
FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }} FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }}
OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }}
steps: steps:
- name: Checkout scripts, config, and compose files - name: Checkout scripts, config, and compose files
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -138,6 +141,7 @@ jobs:
run: | run: |
docker pull $BACKEND_IMAGE docker pull $BACKEND_IMAGE
docker pull $FRONTEND_IMAGE docker pull $FRONTEND_IMAGE
docker pull $OCR_IMAGE
- name: Record expected image IDs - name: Record expected image IDs
id: expected-images id: expected-images
@@ -155,10 +159,12 @@ jobs:
cd "$DEPLOY_PATH" cd "$DEPLOY_PATH"
export BACKEND_IMAGE=$BACKEND_IMAGE export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE
# --force-recreate ensures containers are recreated even if image tag is same # --force-recreate ensures containers are recreated even if image tag is same
# This prevents stale container content when image digest changes # This prevents stale container content when image digest changes
# Start shared OCR service and target stack
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \ docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \
mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization - name: Wait for stack initialization
run: sleep 10 run: sleep 10

View File

@@ -29,6 +29,7 @@ jobs:
outputs: outputs:
backend_image: ${{ steps.tags.outputs.backend_image }} backend_image: ${{ steps.tags.outputs.backend_image }}
frontend_image: ${{ steps.tags.outputs.frontend_image }} frontend_image: ${{ steps.tags.outputs.frontend_image }}
ocr_image: ${{ steps.tags.outputs.ocr_image }}
short_sha: ${{ steps.tags.outputs.short_sha }} short_sha: ${{ steps.tags.outputs.short_sha }}
steps: steps:
- name: Checkout code - name: Checkout code
@@ -45,6 +46,7 @@ jobs:
SHORT_SHA="${SHORT_SHA:0:7}" SHORT_SHA="${SHORT_SHA:0:7}"
echo "backend_image=$REGISTRY/egullickson/backend:$SHORT_SHA" >> $GITHUB_OUTPUT echo "backend_image=$REGISTRY/egullickson/backend:$SHORT_SHA" >> $GITHUB_OUTPUT
echo "frontend_image=$REGISTRY/egullickson/frontend:$SHORT_SHA" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$SHORT_SHA" >> $GITHUB_OUTPUT
echo "ocr_image=$REGISTRY/egullickson/ocr:$SHORT_SHA" >> $GITHUB_OUTPUT
echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT
- name: Build backend image - name: Build backend image
@@ -74,12 +76,24 @@ jobs:
-f frontend/Dockerfile \ -f frontend/Dockerfile \
frontend frontend
- name: Build OCR image
run: |
docker build \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--cache-from $REGISTRY/egullickson/ocr:latest \
-t ${{ steps.tags.outputs.ocr_image }} \
-t $REGISTRY/egullickson/ocr:latest \
-f ocr/Dockerfile \
ocr
- name: Push images - name: Push images
run: | run: |
docker push ${{ steps.tags.outputs.backend_image }} docker push ${{ steps.tags.outputs.backend_image }}
docker push ${{ steps.tags.outputs.frontend_image }} docker push ${{ steps.tags.outputs.frontend_image }}
docker push ${{ steps.tags.outputs.ocr_image }}
docker push $REGISTRY/egullickson/backend:latest docker push $REGISTRY/egullickson/backend:latest
docker push $REGISTRY/egullickson/frontend:latest docker push $REGISTRY/egullickson/frontend:latest
docker push $REGISTRY/egullickson/ocr:latest
# ============================================ # ============================================
# DEPLOY STAGING - Deploy to staging server # DEPLOY STAGING - Deploy to staging server
@@ -91,6 +105,7 @@ jobs:
env: env:
BACKEND_IMAGE: ${{ needs.build.outputs.backend_image }} BACKEND_IMAGE: ${{ needs.build.outputs.backend_image }}
FRONTEND_IMAGE: ${{ needs.build.outputs.frontend_image }} FRONTEND_IMAGE: ${{ needs.build.outputs.frontend_image }}
OCR_IMAGE: ${{ needs.build.outputs.ocr_image }}
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -139,12 +154,14 @@ jobs:
run: | run: |
docker pull $BACKEND_IMAGE docker pull $BACKEND_IMAGE
docker pull $FRONTEND_IMAGE docker pull $FRONTEND_IMAGE
docker pull $OCR_IMAGE
- name: Deploy staging stack - name: Deploy staging stack
run: | run: |
cd "$DEPLOY_PATH" cd "$DEPLOY_PATH"
export BACKEND_IMAGE=$BACKEND_IMAGE export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
@@ -164,7 +181,7 @@ jobs:
- name: Check container status and health - name: Check container status and health
run: | run: |
for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found") status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
if [ "$status" != "running" ]; then if [ "$status" != "running" ]; then
echo "ERROR: $service is not running (status: $status)" echo "ERROR: $service is not running (status: $status)"
@@ -177,11 +194,12 @@ jobs:
# Wait for Docker healthchecks to complete (services with healthcheck defined) # Wait for Docker healthchecks to complete (services with healthcheck defined)
echo "" echo ""
echo "Waiting for Docker healthchecks..." echo "Waiting for Docker healthchecks..."
for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do
# Check if service has a healthcheck defined # Check if service has a healthcheck defined
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false") has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
if [ "$has_healthcheck" = "true" ]; then if [ "$has_healthcheck" = "true" ]; then
for i in 1 2 3 4 5 6 7 8 9 10; do # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 24); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy" echo "OK: $service is healthy"
@@ -191,13 +209,13 @@ jobs:
docker logs $service --tail 50 2>/dev/null || true docker logs $service --tail 50 2>/dev/null || true
exit 1 exit 1
fi fi
if [ $i -eq 10 ]; then if [ $i -eq 24 ]; then
echo "ERROR: $service health check timed out (status: $health)" echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 50 2>/dev/null || true docker logs $service --tail 50 2>/dev/null || true
exit 1 exit 1
fi fi
echo "Waiting for $service healthcheck... (attempt $i/10, status: $health)" echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
sleep 5 sleep 10
done done
else else
echo "SKIP: $service has no healthcheck defined" echo "SKIP: $service has no healthcheck defined"

View File

@@ -17,7 +17,8 @@ const pool = new Pool({
const MIGRATION_ORDER = [ const MIGRATION_ORDER = [
'features/vehicles', // Primary entity, defines update_updated_at_column() 'features/vehicles', // Primary entity, defines update_updated_at_column()
'features/platform', // Normalized make/model/trim schema for dropdowns 'features/platform', // Normalized make/model/trim schema for dropdowns
'features/documents', // Depends on vehicles; provides documents table 'features/user-profile', // User profile management; needed by documents migration
'features/documents', // Depends on vehicles, user-profile; provides documents table
'core/user-preferences', // Depends on update_updated_at_column() 'core/user-preferences', // Depends on update_updated_at_column()
'features/fuel-logs', // Depends on vehicles 'features/fuel-logs', // Depends on vehicles
'features/maintenance', // Depends on vehicles 'features/maintenance', // Depends on vehicles
@@ -25,7 +26,6 @@ const MIGRATION_ORDER = [
'features/admin', // Admin role management and oversight; depends on update_updated_at_column() 'features/admin', // Admin role management and oversight; depends on update_updated_at_column()
'features/backup', // Admin backup feature; depends on update_updated_at_column() 'features/backup', // Admin backup feature; depends on update_updated_at_column()
'features/notifications', // Depends on maintenance and documents 'features/notifications', // Depends on maintenance and documents
'features/user-profile', // User profile management; independent
'features/terms-agreement', // Terms & Conditions acceptance audit trail 'features/terms-agreement', // Terms & Conditions acceptance audit trail
'features/audit-log', // Centralized audit logging; independent 'features/audit-log', // Centralized audit logging; independent
'features/ownership-costs', // Depends on vehicles and documents; TCO recurring costs 'features/ownership-costs', // Depends on vehicles and documents; TCO recurring costs

View File

@@ -194,6 +194,12 @@ services:
- "com.motovaultpro.stack=green" - "com.motovaultpro.stack=green"
- "com.motovaultpro.service=backend" - "com.motovaultpro.service=backend"
# ========================================
# Shared Service - OCR Processing
# ========================================
mvp-ocr:
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
# ======================================== # ========================================
# Override Traefik to add dynamic config # Override Traefik to add dynamic config
# ======================================== # ========================================

View File

@@ -55,6 +55,13 @@ services:
- "traefik.http.routers.mvp-backend-health.priority=30" - "traefik.http.routers.mvp-backend-health.priority=30"
- "traefik.http.services.mvp-backend.loadbalancer.server.port=3001" - "traefik.http.services.mvp-backend.loadbalancer.server.port=3001"
# ========================================
# OCR Service (Staging)
# ========================================
mvp-ocr:
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
container_name: mvp-ocr-staging
# ======================================== # ========================================
# PostgreSQL (Staging - Separate Database) # PostgreSQL (Staging - Separate Database)
# ======================================== # ========================================

View File

@@ -140,8 +140,8 @@ services:
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))" - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 5
start_period: 40s start_period: 180s
labels: labels:
- "traefik.enable=true" - "traefik.enable=true"
- "traefik.docker.network=motovaultpro_backend" - "traefik.docker.network=motovaultpro_backend"
@@ -164,6 +164,24 @@ services:
- "traefik.http.services.mvp-backend.loadbalancer.healthcheck.timeout=10s" - "traefik.http.services.mvp-backend.loadbalancer.healthcheck.timeout=10s"
- "traefik.http.services.mvp-backend.loadbalancer.passhostheader=true" - "traefik.http.services.mvp-backend.loadbalancer.passhostheader=true"
# Application Services - OCR Processing
mvp-ocr:
build:
context: ./ocr
dockerfile: Dockerfile
container_name: mvp-ocr
restart: unless-stopped
environment:
LOG_LEVEL: info
networks:
- backend
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
# Database Services - Application PostgreSQL # Database Services - Application PostgreSQL
mvp-postgres: mvp-postgres:
image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/postgres:18-alpine image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/postgres:18-alpine

23
ocr/Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
FROM python:3.11-slim
# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
tesseract-ocr \
tesseract-ocr-eng \
libtesseract-dev \
libheif1 \
libheif-dev \
libglib2.0-0 \
libmagic1 \
curl \
&& rm -rf /var/lib/apt/lists/*
# Python dependencies
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
EXPOSE 8000
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]

1
ocr/app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# OCR Service Application

15
ocr/app/config.py Normal file
View File

@@ -0,0 +1,15 @@
"""OCR Service Configuration."""
import os
class Settings:
"""Application settings loaded from environment variables."""
def __init__(self) -> None:
self.log_level: str = os.getenv("LOG_LEVEL", "info")
self.host: str = os.getenv("HOST", "0.0.0.0")
self.port: int = int(os.getenv("PORT", "8000"))
self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
settings = Settings()

26
ocr/app/main.py Normal file
View File

@@ -0,0 +1,26 @@
"""OCR Service FastAPI Application."""
from fastapi import FastAPI
from app.config import settings
app = FastAPI(
title="MotoVaultPro OCR Service",
description="OCR processing service for vehicle documents",
version="1.0.0",
)
@app.get("/health")
async def health_check() -> dict:
"""Health check endpoint for container orchestration."""
return {"status": "healthy"}
@app.get("/")
async def root() -> dict:
"""Root endpoint with service information."""
return {
"service": "mvp-ocr",
"version": "1.0.0",
"log_level": settings.log_level,
}

20
ocr/requirements.txt Normal file
View File

@@ -0,0 +1,20 @@
# API Framework
fastapi>=0.100.0
uvicorn[standard]>=0.23.0
python-multipart>=0.0.6
# File Detection & Handling
python-magic>=0.4.27
pillow>=10.0.0
pillow-heif>=0.13.0
# Image Preprocessing
opencv-python-headless>=4.8.0
numpy>=1.24.0
# OCR Engines
pytesseract>=0.3.10
# Testing
pytest>=7.4.0
httpx>=0.24.0

1
ocr/tests/__init__.py Normal file
View File

@@ -0,0 +1 @@
# OCR Service Tests

52
ocr/tests/test_health.py Normal file
View File

@@ -0,0 +1,52 @@
"""Tests for OCR service health and core functionality."""
import io
import pytest
from fastapi.testclient import TestClient
from PIL import Image
from app.main import app
@pytest.fixture
def client():
"""Create test client for FastAPI app."""
return TestClient(app)
def test_health_endpoint(client):
"""Health endpoint returns healthy status."""
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "healthy"}
def test_root_endpoint(client):
"""Root endpoint returns service information."""
response = client.get("/")
assert response.status_code == 200
data = response.json()
assert data["service"] == "mvp-ocr"
assert "version" in data
def test_pillow_heif_can_register():
"""pillow-heif can register with Pillow for HEIC support."""
import pillow_heif
pillow_heif.register_heif_opener()
# Verify HEIC format is registered
assert "HEIF" in Image.registered_extensions().values()
def test_tesseract_available():
"""Tesseract OCR is available and can process images."""
import pytesseract
# Create a simple test image with text
img = Image.new("RGB", (200, 50), color="white")
# Verify pytesseract can call tesseract (will return empty string for blank image)
result = pytesseract.image_to_string(img)
# Just verify it doesn't raise an exception - blank image returns empty/whitespace
assert isinstance(result, str)