Files
motovaultpro/.gitea/workflows/production.yaml
Eric Gullickson efbbe34080
All checks were successful
Deploy to Staging / Build Images (push) Successful in 33s
Deploy to Staging / Deploy to Staging (push) Successful in 31s
Deploy to Staging / Verify Staging (push) Successful in 2m19s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
fix: add backend health check step to production workflow
Add "Wait for backend health" step using docker exec to verify backend
is responding before attempting external health check. Matches staging
workflow pattern.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 20:42:59 -06:00

468 lines
19 KiB
YAML

# MotoVaultPro Production Deployment Workflow
# Manual trigger only - run after verifying staging
# Blue-green deployment with auto-rollback
#
# Optimization: Uses sparse checkout (scripts/ only) + shallow clone
# since all scripts run from $DEPLOY_PATH on the production server
name: Deploy to Production
run-name: Production Deploy - ${{ inputs.image_tag || 'latest' }}
on:
workflow_dispatch:
inputs:
image_tag:
description: 'Image tag to deploy (defaults to latest)'
required: false
default: 'latest'
env:
REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro
COMPOSE_FILE: docker-compose.yml
COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml
HEALTH_CHECK_TIMEOUT: "60"
LOG_LEVEL: INFO
jobs:
# ============================================
# VALIDATE - Determine target stack
# ============================================
validate:
name: Validate Prerequisites
runs-on: prod
outputs:
target_stack: ${{ steps.determine-stack.outputs.target_stack }}
backend_image: ${{ steps.set-images.outputs.backend_image }}
frontend_image: ${{ steps.set-images.outputs.frontend_image }}
ocr_image: ${{ steps.set-images.outputs.ocr_image }}
steps:
- name: Check Docker availability
run: |
docker info > /dev/null 2>&1 || (echo "ERROR - Docker not accessible" && exit 1)
docker compose version > /dev/null 2>&1 || (echo "ERROR - Docker Compose not available" && exit 1)
- name: Check deployment path
run: test -d "$DEPLOY_PATH" || (echo "ERROR - DEPLOY_PATH not found" && exit 1)
- name: Login to Gitea Container Registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
- name: Set image tags
id: set-images
run: |
TAG="${{ inputs.image_tag }}"
echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT
echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT
echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT
- name: Determine target stack
id: determine-stack
run: |
STATE_FILE="$DEPLOY_PATH/config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
ACTIVE_STACK=$(jq -r '.active_stack // "blue"' "$STATE_FILE")
if [ "$ACTIVE_STACK" = "blue" ]; then
echo "target_stack=green" >> $GITHUB_OUTPUT
echo "Deploying to GREEN stack (BLUE is currently active)"
else
echo "target_stack=blue" >> $GITHUB_OUTPUT
echo "Deploying to BLUE stack (GREEN is currently active)"
fi
else
echo "target_stack=green" >> $GITHUB_OUTPUT
echo "No state file found, defaulting to GREEN stack"
fi
# ============================================
# DEPLOY PROD - Blue-green deployment
# ============================================
deploy-prod:
name: Deploy to Production
runs-on: prod
needs: validate
env:
TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }}
FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }}
OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }}
steps:
- name: Checkout scripts, config, and compose files
uses: actions/checkout@v4
with:
sparse-checkout: |
scripts/
config/
docker-compose.yml
docker-compose.blue-green.yml
sparse-checkout-cone-mode: false
fetch-depth: 1
- name: Sync config, scripts, and compose files to deploy path
run: |
rsync -av --delete "$GITHUB_WORKSPACE/config/" "$DEPLOY_PATH/config/"
rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/"
cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/"
- name: Generate logging configuration
run: |
cd "$DEPLOY_PATH"
chmod +x scripts/ci/generate-log-config.sh
./scripts/ci/generate-log-config.sh "$LOG_LEVEL"
- name: Login to registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
- name: Inject secrets
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/inject-secrets.sh"
"$GITHUB_WORKSPACE/scripts/inject-secrets.sh"
env:
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }}
AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }}
AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }}
GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }}
GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }}
CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
STRIPE_SECRET_KEY: ${{ secrets.STRIPE_SECRET_KEY }}
STRIPE_WEBHOOK_SECRET: ${{ secrets.STRIPE_WEBHOOK_SECRET }}
- name: Initialize data directories
run: |
cd "$DEPLOY_PATH"
sudo mkdir -p data/backups data/documents data/traefik
sudo chown -R 1001:1001 data/backups data/documents
sudo chmod 755 data/backups data/documents
# Traefik acme.json requires 600 permissions
if [ ! -f data/traefik/acme.json ]; then
sudo touch data/traefik/acme.json
fi
sudo chmod 600 data/traefik/acme.json
- name: Pull new images
run: |
docker pull $BACKEND_IMAGE
docker pull $FRONTEND_IMAGE
docker pull $OCR_IMAGE
- name: Record expected image IDs
id: expected-images
run: |
# Get the image IDs we just pulled - these are what containers should use
FRONTEND_ID=$(docker images --format '{{.ID}}' $FRONTEND_IMAGE | head -1)
BACKEND_ID=$(docker images --format '{{.ID}}' $BACKEND_IMAGE | head -1)
echo "Expected frontend image ID: $FRONTEND_ID"
echo "Expected backend image ID: $BACKEND_ID"
echo "frontend_id=$FRONTEND_ID" >> $GITHUB_OUTPUT
echo "backend_id=$BACKEND_ID" >> $GITHUB_OUTPUT
- name: Start target stack
run: |
cd "$DEPLOY_PATH"
export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE
# --force-recreate ensures containers are recreated even if image tag is same
# This prevents stale container content when image digest changes
# Start shared OCR service and target stack
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization
run: sleep 10
- name: Verify container images
run: |
# Verify containers are running the expected images
EXPECTED_FRONTEND="${{ steps.expected-images.outputs.frontend_id }}"
EXPECTED_BACKEND="${{ steps.expected-images.outputs.backend_id }}"
RUNNING_FRONTEND=$(docker inspect --format='{{.Image}}' mvp-frontend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
RUNNING_BACKEND=$(docker inspect --format='{{.Image}}' mvp-backend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
echo "Frontend - Expected: $EXPECTED_FRONTEND, Running: $RUNNING_FRONTEND"
echo "Backend - Expected: $EXPECTED_BACKEND, Running: $RUNNING_BACKEND"
if [[ "$RUNNING_FRONTEND" != "$EXPECTED_FRONTEND" ]]; then
echo "ERROR: Frontend container not using expected image!"
echo "Container may be stale. Force recreate should have prevented this."
exit 1
fi
if [[ "$RUNNING_BACKEND" != "$EXPECTED_BACKEND" ]]; then
echo "ERROR: Backend container not using expected image!"
exit 1
fi
echo "OK: All containers using correct images"
- name: Run health check
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/health-check.sh"
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT
- name: Start Traefik
run: |
cd "$DEPLOY_PATH"
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d mvp-traefik
- name: Wait for Traefik
run: |
echo "Waiting for Traefik to be healthy..."
timeout 30 bash -c "until docker inspect --format='{{.State.Health.Status}}' mvp-traefik 2>/dev/null | grep -q healthy; do sleep 2; done" || {
echo "Traefik health check timed out, checking status..."
docker inspect --format='{{.State.Status}}' mvp-traefik
docker logs mvp-traefik --tail 20
exit 1
}
echo "Traefik is healthy"
- name: Switch traffic
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh"
# DEPLOY_PATH ensures script modifies config at /opt/motovaultpro, not checkout dir
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant
- name: Update deployment state
run: |
cd "$DEPLOY_PATH"
STATE_FILE="config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
jq --arg commit "${{ inputs.image_tag }}" \
--arg ts "$TIMESTAMP" \
'.last_deployment = $ts | .last_deployment_commit = $commit | .last_deployment_status = "success" | .rollback_available = true' \
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
fi
# ============================================
# VERIFY PROD - External health check
# ============================================
verify-prod:
name: Verify Production
runs-on: prod
needs: [validate, deploy-prod]
env:
TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
steps:
- name: Wait for routing propagation
run: sleep 5
- name: Check container status and health
run: |
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
if [ "$status" != "running" ]; then
echo "ERROR: $service is not running (status: $status)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "OK: $service is running"
done
# Wait for Docker healthchecks to complete (services with healthcheck defined)
echo ""
echo "Waiting for Docker healthchecks..."
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
# Check if service has a healthcheck defined
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
if [ "$has_healthcheck" = "true" ]; then
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 24); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy"
break
elif [ "$health" = "unhealthy" ]; then
echo "ERROR: $service is unhealthy"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
if [ $i -eq 24 ]; then
echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
sleep 10
done
else
echo "SKIP: $service has no healthcheck defined"
fi
done
- name: Wait for backend health
run: |
for i in 1 2 3 4 5 6; do
if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
echo "OK: Backend health check passed"
exit 0
fi
if [ $i -eq 6 ]; then
echo "ERROR: Backend health check failed after 6 attempts"
docker logs mvp-backend-$TARGET_STACK --tail 100
exit 1
fi
echo "Attempt $i/6: Backend not ready, waiting 10s..."
sleep 10
done
- name: External health check
run: |
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
for i in 1 2 3 4 5 6; do
RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
echo "Attempt $i/6: Connection failed, waiting 10s..."
sleep 10
continue
}
# Check status is "healthy"
STATUS=$(echo "$RESPONSE" | jq -r '.status')
if [ "$STATUS" != "healthy" ]; then
echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
sleep 10
continue
fi
# Check all required features are present
MISSING=$(echo "$RESPONSE" | jq -r --argjson required "$REQUIRED_FEATURES" '
$required - .features | if length > 0 then . else empty end | @json
')
if [ -n "$MISSING" ]; then
echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
sleep 10
continue
fi
FEATURE_COUNT=$(echo "$RESPONSE" | jq '.features | length')
echo "OK: Production health check passed - status: healthy, features: $FEATURE_COUNT"
exit 0
done
echo "ERROR: Production health check failed after 6 attempts"
echo "Last response: $RESPONSE"
exit 1
- name: Verify container status
run: |
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK; do
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$status" != "running" ] || [ "$health" != "healthy" ]; then
echo "ERROR: $service is not healthy (status: $status, health: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "OK: $service is running and healthy"
done
- name: Validate Traefik routing weights
run: |
# Verify traffic has actually switched to the new stack
BLUE_GREEN_CONFIG="$DEPLOY_PATH/config/traefik/dynamic/blue-green.yml"
if [[ "$TARGET_STACK" == "green" ]]; then
EXPECTED_TARGET_WEIGHT=100
EXPECTED_OTHER_WEIGHT=0
TARGET_SVC="mvp-frontend-green-svc"
else
EXPECTED_TARGET_WEIGHT=100
EXPECTED_OTHER_WEIGHT=0
TARGET_SVC="mvp-frontend-blue-svc"
fi
ACTUAL_WEIGHT=$(grep -A1 "$TARGET_SVC" "$BLUE_GREEN_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1)
if [[ "$ACTUAL_WEIGHT" != "$EXPECTED_TARGET_WEIGHT" ]]; then
echo "ERROR: Traffic not routed to $TARGET_STACK stack!"
echo "Expected weight for $TARGET_SVC: $EXPECTED_TARGET_WEIGHT, Actual: $ACTUAL_WEIGHT"
cat "$BLUE_GREEN_CONFIG" | grep -A2 weight
exit 1
fi
echo "OK: Traffic correctly routed to $TARGET_STACK (weight: $ACTUAL_WEIGHT)"
# ============================================
# ROLLBACK - Auto-rollback on failure
# ============================================
rollback:
name: Auto Rollback
runs-on: prod
needs: [validate, deploy-prod, verify-prod]
if: failure()
steps:
- name: Checkout scripts
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Execute rollback
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh"
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback"
- name: Update state
run: |
cd "$DEPLOY_PATH"
STATE_FILE="config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
jq '.last_deployment_status = "rolled_back"' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
fi
# ============================================
# NOTIFY SUCCESS
# ============================================
notify-success:
name: Notify Success
runs-on: prod
needs: [validate, verify-prod]
if: success()
steps:
- name: Checkout scripts only
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Send success notification
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
"$GITHUB_WORKSPACE/scripts/ci/notify.sh" success "Production deployment successful - ${{ inputs.image_tag }} is now live" ${{ inputs.image_tag }}
env:
DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
# ============================================
# NOTIFY FAILURE
# ============================================
notify-failure:
name: Notify Failure
runs-on: prod
needs: [validate, deploy-prod, verify-prod, rollback]
if: failure()
steps:
- name: Checkout scripts only
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Send failure notification
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
"$GITHUB_WORKSPACE/scripts/ci/notify.sh" failure "Production deployment failed for ${{ inputs.image_tag }}" ${{ inputs.image_tag }}
env:
DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}