feat: add OCR service container (refs #64) #72
@@ -34,6 +34,7 @@ jobs:
|
||||
target_stack: ${{ steps.determine-stack.outputs.target_stack }}
|
||||
backend_image: ${{ steps.set-images.outputs.backend_image }}
|
||||
frontend_image: ${{ steps.set-images.outputs.frontend_image }}
|
||||
ocr_image: ${{ steps.set-images.outputs.ocr_image }}
|
||||
steps:
|
||||
- name: Check Docker availability
|
||||
run: |
|
||||
@@ -53,6 +54,7 @@ jobs:
|
||||
TAG="${{ inputs.image_tag }}"
|
||||
echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT
|
||||
echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT
|
||||
echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Determine target stack
|
||||
id: determine-stack
|
||||
@@ -83,6 +85,7 @@ jobs:
|
||||
TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
|
||||
BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }}
|
||||
FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }}
|
||||
OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }}
|
||||
steps:
|
||||
- name: Checkout scripts, config, and compose files
|
||||
uses: actions/checkout@v4
|
||||
@@ -138,6 +141,7 @@ jobs:
|
||||
run: |
|
||||
docker pull $BACKEND_IMAGE
|
||||
docker pull $FRONTEND_IMAGE
|
||||
docker pull $OCR_IMAGE
|
||||
|
||||
- name: Record expected image IDs
|
||||
id: expected-images
|
||||
@@ -155,10 +159,12 @@ jobs:
|
||||
cd "$DEPLOY_PATH"
|
||||
export BACKEND_IMAGE=$BACKEND_IMAGE
|
||||
export FRONTEND_IMAGE=$FRONTEND_IMAGE
|
||||
export OCR_IMAGE=$OCR_IMAGE
|
||||
# --force-recreate ensures containers are recreated even if image tag is same
|
||||
# This prevents stale container content when image digest changes
|
||||
# Start shared OCR service and target stack
|
||||
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \
|
||||
mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
|
||||
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
|
||||
|
||||
- name: Wait for stack initialization
|
||||
run: sleep 10
|
||||
|
||||
@@ -29,6 +29,7 @@ jobs:
|
||||
outputs:
|
||||
backend_image: ${{ steps.tags.outputs.backend_image }}
|
||||
frontend_image: ${{ steps.tags.outputs.frontend_image }}
|
||||
ocr_image: ${{ steps.tags.outputs.ocr_image }}
|
||||
short_sha: ${{ steps.tags.outputs.short_sha }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -45,6 +46,7 @@ jobs:
|
||||
SHORT_SHA="${SHORT_SHA:0:7}"
|
||||
echo "backend_image=$REGISTRY/egullickson/backend:$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||
echo "frontend_image=$REGISTRY/egullickson/frontend:$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||
echo "ocr_image=$REGISTRY/egullickson/ocr:$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||
echo "short_sha=$SHORT_SHA" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build backend image
|
||||
@@ -74,12 +76,24 @@ jobs:
|
||||
-f frontend/Dockerfile \
|
||||
frontend
|
||||
|
||||
- name: Build OCR image
|
||||
run: |
|
||||
docker build \
|
||||
--build-arg BUILDKIT_INLINE_CACHE=1 \
|
||||
--cache-from $REGISTRY/egullickson/ocr:latest \
|
||||
-t ${{ steps.tags.outputs.ocr_image }} \
|
||||
-t $REGISTRY/egullickson/ocr:latest \
|
||||
-f ocr/Dockerfile \
|
||||
ocr
|
||||
|
||||
- name: Push images
|
||||
run: |
|
||||
docker push ${{ steps.tags.outputs.backend_image }}
|
||||
docker push ${{ steps.tags.outputs.frontend_image }}
|
||||
docker push ${{ steps.tags.outputs.ocr_image }}
|
||||
docker push $REGISTRY/egullickson/backend:latest
|
||||
docker push $REGISTRY/egullickson/frontend:latest
|
||||
docker push $REGISTRY/egullickson/ocr:latest
|
||||
|
||||
# ============================================
|
||||
# DEPLOY STAGING - Deploy to staging server
|
||||
@@ -91,6 +105,7 @@ jobs:
|
||||
env:
|
||||
BACKEND_IMAGE: ${{ needs.build.outputs.backend_image }}
|
||||
FRONTEND_IMAGE: ${{ needs.build.outputs.frontend_image }}
|
||||
OCR_IMAGE: ${{ needs.build.outputs.ocr_image }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -139,12 +154,14 @@ jobs:
|
||||
run: |
|
||||
docker pull $BACKEND_IMAGE
|
||||
docker pull $FRONTEND_IMAGE
|
||||
docker pull $OCR_IMAGE
|
||||
|
||||
- name: Deploy staging stack
|
||||
run: |
|
||||
cd "$DEPLOY_PATH"
|
||||
export BACKEND_IMAGE=$BACKEND_IMAGE
|
||||
export FRONTEND_IMAGE=$FRONTEND_IMAGE
|
||||
export OCR_IMAGE=$OCR_IMAGE
|
||||
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true
|
||||
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
|
||||
|
||||
@@ -164,7 +181,7 @@ jobs:
|
||||
|
||||
- name: Check container status and health
|
||||
run: |
|
||||
for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do
|
||||
for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do
|
||||
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
|
||||
if [ "$status" != "running" ]; then
|
||||
echo "ERROR: $service is not running (status: $status)"
|
||||
@@ -177,11 +194,12 @@ jobs:
|
||||
# Wait for Docker healthchecks to complete (services with healthcheck defined)
|
||||
echo ""
|
||||
echo "Waiting for Docker healthchecks..."
|
||||
for service in mvp-frontend-staging mvp-backend-staging mvp-postgres-staging mvp-redis-staging; do
|
||||
for service in mvp-frontend-staging mvp-backend-staging mvp-ocr-staging mvp-postgres-staging mvp-redis-staging; do
|
||||
# Check if service has a healthcheck defined
|
||||
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
|
||||
if [ "$has_healthcheck" = "true" ]; then
|
||||
for i in 1 2 3 4 5 6 7 8 9 10; do
|
||||
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
|
||||
for i in $(seq 1 24); do
|
||||
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
|
||||
if [ "$health" = "healthy" ]; then
|
||||
echo "OK: $service is healthy"
|
||||
@@ -191,13 +209,13 @@ jobs:
|
||||
docker logs $service --tail 50 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
if [ $i -eq 10 ]; then
|
||||
if [ $i -eq 24 ]; then
|
||||
echo "ERROR: $service health check timed out (status: $health)"
|
||||
docker logs $service --tail 50 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
echo "Waiting for $service healthcheck... (attempt $i/10, status: $health)"
|
||||
sleep 5
|
||||
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
|
||||
sleep 10
|
||||
done
|
||||
else
|
||||
echo "SKIP: $service has no healthcheck defined"
|
||||
|
||||
@@ -17,7 +17,8 @@ const pool = new Pool({
|
||||
const MIGRATION_ORDER = [
|
||||
'features/vehicles', // Primary entity, defines update_updated_at_column()
|
||||
'features/platform', // Normalized make/model/trim schema for dropdowns
|
||||
'features/documents', // Depends on vehicles; provides documents table
|
||||
'features/user-profile', // User profile management; needed by documents migration
|
||||
'features/documents', // Depends on vehicles, user-profile; provides documents table
|
||||
'core/user-preferences', // Depends on update_updated_at_column()
|
||||
'features/fuel-logs', // Depends on vehicles
|
||||
'features/maintenance', // Depends on vehicles
|
||||
@@ -25,7 +26,6 @@ const MIGRATION_ORDER = [
|
||||
'features/admin', // Admin role management and oversight; depends on update_updated_at_column()
|
||||
'features/backup', // Admin backup feature; depends on update_updated_at_column()
|
||||
'features/notifications', // Depends on maintenance and documents
|
||||
'features/user-profile', // User profile management; independent
|
||||
'features/terms-agreement', // Terms & Conditions acceptance audit trail
|
||||
'features/audit-log', // Centralized audit logging; independent
|
||||
'features/ownership-costs', // Depends on vehicles and documents; TCO recurring costs
|
||||
|
||||
@@ -194,6 +194,12 @@ services:
|
||||
- "com.motovaultpro.stack=green"
|
||||
- "com.motovaultpro.service=backend"
|
||||
|
||||
# ========================================
|
||||
# Shared Service - OCR Processing
|
||||
# ========================================
|
||||
mvp-ocr:
|
||||
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
|
||||
|
||||
# ========================================
|
||||
# Override Traefik to add dynamic config
|
||||
# ========================================
|
||||
|
||||
@@ -55,6 +55,13 @@ services:
|
||||
- "traefik.http.routers.mvp-backend-health.priority=30"
|
||||
- "traefik.http.services.mvp-backend.loadbalancer.server.port=3001"
|
||||
|
||||
# ========================================
|
||||
# OCR Service (Staging)
|
||||
# ========================================
|
||||
mvp-ocr:
|
||||
image: ${OCR_IMAGE:-git.motovaultpro.com/egullickson/ocr:latest}
|
||||
container_name: mvp-ocr-staging
|
||||
|
||||
# ========================================
|
||||
# PostgreSQL (Staging - Separate Database)
|
||||
# ========================================
|
||||
|
||||
@@ -140,8 +140,8 @@ services:
|
||||
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 40s
|
||||
retries: 5
|
||||
start_period: 180s
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.docker.network=motovaultpro_backend"
|
||||
@@ -164,6 +164,24 @@ services:
|
||||
- "traefik.http.services.mvp-backend.loadbalancer.healthcheck.timeout=10s"
|
||||
- "traefik.http.services.mvp-backend.loadbalancer.passhostheader=true"
|
||||
|
||||
# Application Services - OCR Processing
|
||||
mvp-ocr:
|
||||
build:
|
||||
context: ./ocr
|
||||
dockerfile: Dockerfile
|
||||
container_name: mvp-ocr
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
LOG_LEVEL: info
|
||||
networks:
|
||||
- backend
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
|
||||
# Database Services - Application PostgreSQL
|
||||
mvp-postgres:
|
||||
image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/postgres:18-alpine
|
||||
|
||||
23
ocr/Dockerfile
Normal file
23
ocr/Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# System dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
libtesseract-dev \
|
||||
libheif1 \
|
||||
libheif-dev \
|
||||
libglib2.0-0 \
|
||||
libmagic1 \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Python dependencies
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
1
ocr/app/__init__.py
Normal file
1
ocr/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# OCR Service Application
|
||||
15
ocr/app/config.py
Normal file
15
ocr/app/config.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""OCR Service Configuration."""
|
||||
import os
|
||||
|
||||
|
||||
class Settings:
|
||||
"""Application settings loaded from environment variables."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.log_level: str = os.getenv("LOG_LEVEL", "info")
|
||||
self.host: str = os.getenv("HOST", "0.0.0.0")
|
||||
self.port: int = int(os.getenv("PORT", "8000"))
|
||||
self.tesseract_cmd: str = os.getenv("TESSERACT_CMD", "/usr/bin/tesseract")
|
||||
|
||||
|
||||
settings = Settings()
|
||||
26
ocr/app/main.py
Normal file
26
ocr/app/main.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""OCR Service FastAPI Application."""
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.config import settings
|
||||
|
||||
app = FastAPI(
|
||||
title="MotoVaultPro OCR Service",
|
||||
description="OCR processing service for vehicle documents",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check() -> dict:
|
||||
"""Health check endpoint for container orchestration."""
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root() -> dict:
|
||||
"""Root endpoint with service information."""
|
||||
return {
|
||||
"service": "mvp-ocr",
|
||||
"version": "1.0.0",
|
||||
"log_level": settings.log_level,
|
||||
}
|
||||
20
ocr/requirements.txt
Normal file
20
ocr/requirements.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
# API Framework
|
||||
fastapi>=0.100.0
|
||||
uvicorn[standard]>=0.23.0
|
||||
python-multipart>=0.0.6
|
||||
|
||||
# File Detection & Handling
|
||||
python-magic>=0.4.27
|
||||
pillow>=10.0.0
|
||||
pillow-heif>=0.13.0
|
||||
|
||||
# Image Preprocessing
|
||||
opencv-python-headless>=4.8.0
|
||||
numpy>=1.24.0
|
||||
|
||||
# OCR Engines
|
||||
pytesseract>=0.3.10
|
||||
|
||||
# Testing
|
||||
pytest>=7.4.0
|
||||
httpx>=0.24.0
|
||||
1
ocr/tests/__init__.py
Normal file
1
ocr/tests/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# OCR Service Tests
|
||||
52
ocr/tests/test_health.py
Normal file
52
ocr/tests/test_health.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Tests for OCR service health and core functionality."""
|
||||
import io
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from PIL import Image
|
||||
|
||||
from app.main import app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
"""Create test client for FastAPI app."""
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def test_health_endpoint(client):
|
||||
"""Health endpoint returns healthy status."""
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "healthy"}
|
||||
|
||||
|
||||
def test_root_endpoint(client):
|
||||
"""Root endpoint returns service information."""
|
||||
response = client.get("/")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["service"] == "mvp-ocr"
|
||||
assert "version" in data
|
||||
|
||||
|
||||
def test_pillow_heif_can_register():
|
||||
"""pillow-heif can register with Pillow for HEIC support."""
|
||||
import pillow_heif
|
||||
|
||||
pillow_heif.register_heif_opener()
|
||||
# Verify HEIC format is registered
|
||||
assert "HEIF" in Image.registered_extensions().values()
|
||||
|
||||
|
||||
def test_tesseract_available():
|
||||
"""Tesseract OCR is available and can process images."""
|
||||
import pytesseract
|
||||
|
||||
# Create a simple test image with text
|
||||
img = Image.new("RGB", (200, 50), color="white")
|
||||
|
||||
# Verify pytesseract can call tesseract (will return empty string for blank image)
|
||||
result = pytesseract.image_to_string(img)
|
||||
# Just verify it doesn't raise an exception - blank image returns empty/whitespace
|
||||
assert isinstance(result, str)
|
||||
Reference in New Issue
Block a user