motovaultpro/.gitea/workflows/production.yaml at efbbe3408095bf5382e14d76994d7ce13cd1be13

egullickson/motovaultpro

Fork 0

Files

Eric Gullickson efbbe34080

Deploy to Staging / Build Images (push) Successful in 33s

Details

Deploy to Staging / Deploy to Staging (push) Successful in 31s

Details

Deploy to Staging / Verify Staging (push) Successful in 2m19s

Details

Deploy to Staging / Notify Staging Ready (push) Successful in 7s

Details

Deploy to Staging / Notify Staging Failure (push) Has been skipped

Details

fix: add backend health check step to production workflow

Add "Wait for backend health" step using docker exec to verify backend
is responding before attempting external health check. Matches staging
workflow pattern.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

2026-02-03 20:42:59 -06:00

468 lines

19 KiB

YAML

Raw Blame History

 # MotoVaultPro Production Deployment Workflow
 # Manual trigger only - run after verifying staging
 # Blue-green deployment with auto-rollback
 #
 # Optimization: Uses sparse checkout (scripts/ only) + shallow clone
 # since all scripts run from $DEPLOY_PATH on the production server
 name: Deploy to Production
 run-name: Production Deploy - ${{ inputs.image_tag || 'latest' }}
 on:
   workflow_dispatch:
     inputs:
       image_tag:
         description: 'Image tag to deploy (defaults to latest)'
         required: false
         default: 'latest'
 env:
   REGISTRY: git.motovaultpro.com
   DEPLOY_PATH: /opt/motovaultpro
   COMPOSE_FILE: docker-compose.yml
   COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml
   HEALTH_CHECK_TIMEOUT: "60"
   LOG_LEVEL: INFO
 jobs:
   # ============================================
   # VALIDATE - Determine target stack
   # ============================================
   validate:
     name: Validate Prerequisites
     runs-on: prod
     outputs:
       target_stack: ${{ steps.determine-stack.outputs.target_stack }}
       backend_image: ${{ steps.set-images.outputs.backend_image }}
       frontend_image: ${{ steps.set-images.outputs.frontend_image }}
       ocr_image: ${{ steps.set-images.outputs.ocr_image }}
     steps:
       - name: Check Docker availability
         run: |
           docker info > /dev/null 2>&1 || (echo "ERROR - Docker not accessible" && exit 1)
           docker compose version > /dev/null 2>&1 || (echo "ERROR - Docker Compose not available" && exit 1)
       - name: Check deployment path
         run: test -d "$DEPLOY_PATH" || (echo "ERROR - DEPLOY_PATH not found" && exit 1)
       - name: Login to Gitea Container Registry
         run: |
           echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
       - name: Set image tags
         id: set-images
         run: |
           TAG="${{ inputs.image_tag }}"
           echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT
           echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT
           echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT
       - name: Determine target stack
         id: determine-stack
         run: |
           STATE_FILE="$DEPLOY_PATH/config/deployment/state.json"
           if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
             ACTIVE_STACK=$(jq -r '.active_stack // "blue"' "$STATE_FILE")
             if [ "$ACTIVE_STACK" = "blue" ]; then
               echo "target_stack=green" >> $GITHUB_OUTPUT
               echo "Deploying to GREEN stack (BLUE is currently active)"
             else
               echo "target_stack=blue" >> $GITHUB_OUTPUT
               echo "Deploying to BLUE stack (GREEN is currently active)"
             fi
           else
             echo "target_stack=green" >> $GITHUB_OUTPUT
             echo "No state file found, defaulting to GREEN stack"
           fi
   # ============================================
   # DEPLOY PROD - Blue-green deployment
   # ============================================
   deploy-prod:
     name: Deploy to Production
     runs-on: prod
     needs: validate
     env:
       TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
       BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }}
       FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }}
       OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }}
     steps:
       - name: Checkout scripts, config, and compose files
         uses: actions/checkout@v4
         with:
           sparse-checkout: |
             scripts/
             config/
             docker-compose.yml
             docker-compose.blue-green.yml
           sparse-checkout-cone-mode: false
           fetch-depth: 1
       - name: Sync config, scripts, and compose files to deploy path
         run: |
           rsync -av --delete "$GITHUB_WORKSPACE/config/" "$DEPLOY_PATH/config/"
           rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/"
           cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/"
           cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/"
       - name: Generate logging configuration
         run: |
           cd "$DEPLOY_PATH"
           chmod +x scripts/ci/generate-log-config.sh
           ./scripts/ci/generate-log-config.sh "$LOG_LEVEL"
       - name: Login to registry
         run: |
           echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
       - name: Inject secrets
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/inject-secrets.sh"
           "$GITHUB_WORKSPACE/scripts/inject-secrets.sh"
         env:
           POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
           AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }}
           AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }}
           AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }}
           GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }}
           GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }}
           CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }}
           RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
           STRIPE_SECRET_KEY: ${{ secrets.STRIPE_SECRET_KEY }}
           STRIPE_WEBHOOK_SECRET: ${{ secrets.STRIPE_WEBHOOK_SECRET }}
       - name: Initialize data directories
         run: |
           cd "$DEPLOY_PATH"
           sudo mkdir -p data/backups data/documents data/traefik
           sudo chown -R 1001:1001 data/backups data/documents
           sudo chmod 755 data/backups data/documents
           # Traefik acme.json requires 600 permissions
           if [ ! -f data/traefik/acme.json ]; then
             sudo touch data/traefik/acme.json
           fi
           sudo chmod 600 data/traefik/acme.json
       - name: Pull new images
         run: |
           docker pull $BACKEND_IMAGE
           docker pull $FRONTEND_IMAGE
           docker pull $OCR_IMAGE
       - name: Record expected image IDs
         id: expected-images
         run: |
           # Get the image IDs we just pulled - these are what containers should use
           FRONTEND_ID=$(docker images --format '{{.ID}}' $FRONTEND_IMAGE | head -1)
           BACKEND_ID=$(docker images --format '{{.ID}}' $BACKEND_IMAGE | head -1)
           echo "Expected frontend image ID: $FRONTEND_ID"
           echo "Expected backend image ID: $BACKEND_ID"
           echo "frontend_id=$FRONTEND_ID" >> $GITHUB_OUTPUT
           echo "backend_id=$BACKEND_ID" >> $GITHUB_OUTPUT
       - name: Start target stack
         run: |
           cd "$DEPLOY_PATH"
           export BACKEND_IMAGE=$BACKEND_IMAGE
           export FRONTEND_IMAGE=$FRONTEND_IMAGE
           export OCR_IMAGE=$OCR_IMAGE
           # --force-recreate ensures containers are recreated even if image tag is same
           # This prevents stale container content when image digest changes
           # Start shared OCR service and target stack
           docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \
             mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
       - name: Wait for stack initialization
         run: sleep 10
       - name: Verify container images
         run: |
           # Verify containers are running the expected images
           EXPECTED_FRONTEND="${{ steps.expected-images.outputs.frontend_id }}"
           EXPECTED_BACKEND="${{ steps.expected-images.outputs.backend_id }}"
           RUNNING_FRONTEND=$(docker inspect --format='{{.Image}}' mvp-frontend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
           RUNNING_BACKEND=$(docker inspect --format='{{.Image}}' mvp-backend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
           echo "Frontend - Expected: $EXPECTED_FRONTEND, Running: $RUNNING_FRONTEND"
           echo "Backend - Expected: $EXPECTED_BACKEND, Running: $RUNNING_BACKEND"
           if [[ "$RUNNING_FRONTEND" != "$EXPECTED_FRONTEND" ]]; then
             echo "ERROR: Frontend container not using expected image!"
             echo "Container may be stale. Force recreate should have prevented this."
             exit 1
           fi
           if [[ "$RUNNING_BACKEND" != "$EXPECTED_BACKEND" ]]; then
             echo "ERROR: Backend container not using expected image!"
             exit 1
           fi
           echo "OK: All containers using correct images"
       - name: Run health check
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/ci/health-check.sh"
           DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT
       - name: Start Traefik
         run: |
           cd "$DEPLOY_PATH"
           docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d mvp-traefik
       - name: Wait for Traefik
         run: |
           echo "Waiting for Traefik to be healthy..."
           timeout 30 bash -c "until docker inspect --format='{{.State.Health.Status}}' mvp-traefik 2>/dev/null | grep -q healthy; do sleep 2; done" || {
             echo "Traefik health check timed out, checking status..."
             docker inspect --format='{{.State.Status}}' mvp-traefik
             docker logs mvp-traefik --tail 20
             exit 1
           }
           echo "Traefik is healthy"
       - name: Switch traffic
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh"
           # DEPLOY_PATH ensures script modifies config at /opt/motovaultpro, not checkout dir
           DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant
       - name: Update deployment state
         run: |
           cd "$DEPLOY_PATH"
           STATE_FILE="config/deployment/state.json"
           if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
             TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
             jq --arg commit "${{ inputs.image_tag }}" \
                --arg ts "$TIMESTAMP" \
                '.last_deployment = $ts | .last_deployment_commit = $commit | .last_deployment_status = "success" | .rollback_available = true' \
                "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
           fi
   # ============================================
   # VERIFY PROD - External health check
   # ============================================
   verify-prod:
     name: Verify Production
     runs-on: prod
     needs: [validate, deploy-prod]
     env:
       TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
     steps:
       - name: Wait for routing propagation
         run: sleep 5
       - name: Check container status and health
         run: |
           for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
             status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
             if [ "$status" != "running" ]; then
               echo "ERROR: $service is not running (status: $status)"
               docker logs $service --tail 50 2>/dev/null || true
               exit 1
             fi
             echo "OK: $service is running"
           done
           # Wait for Docker healthchecks to complete (services with healthcheck defined)
           echo ""
           echo "Waiting for Docker healthchecks..."
           for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
             # Check if service has a healthcheck defined
             has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
             if [ "$has_healthcheck" = "true" ]; then
               # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
               for i in $(seq 1 24); do
                 health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
                 if [ "$health" = "healthy" ]; then
                   echo "OK: $service is healthy"
                   break
                 elif [ "$health" = "unhealthy" ]; then
                   echo "ERROR: $service is unhealthy"
                   docker logs $service --tail 50 2>/dev/null || true
                   exit 1
                 fi
                 if [ $i -eq 24 ]; then
                   echo "ERROR: $service health check timed out (status: $health)"
                   docker logs $service --tail 50 2>/dev/null || true
                   exit 1
                 fi
                 echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
                 sleep 10
               done
             else
               echo "SKIP: $service has no healthcheck defined"
             fi
           done
       - name: Wait for backend health
         run: |
           for i in 1 2 3 4 5 6; do
             if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
               echo "OK: Backend health check passed"
               exit 0
             fi
             if [ $i -eq 6 ]; then
               echo "ERROR: Backend health check failed after 6 attempts"
               docker logs mvp-backend-$TARGET_STACK --tail 100
               exit 1
             fi
             echo "Attempt $i/6: Backend not ready, waiting 10s..."
             sleep 10
           done
       - name: External health check
         run: |
           REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
           for i in 1 2 3 4 5 6; do
             RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
               echo "Attempt $i/6: Connection failed, waiting 10s..."
               sleep 10
               continue
             }
             # Check status is "healthy"
             STATUS=$(echo "$RESPONSE" | jq -r '.status')
             if [ "$STATUS" != "healthy" ]; then
               echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
               sleep 10
               continue
             fi
             # Check all required features are present
             MISSING=$(echo "$RESPONSE" | jq -r --argjson required "$REQUIRED_FEATURES" '
               $required - .features | if length > 0 then . else empty end | @json
             ')
             if [ -n "$MISSING" ]; then
               echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
               sleep 10
               continue
             fi
             FEATURE_COUNT=$(echo "$RESPONSE" | jq '.features | length')
             echo "OK: Production health check passed - status: healthy, features: $FEATURE_COUNT"
             exit 0
           done
           echo "ERROR: Production health check failed after 6 attempts"
           echo "Last response: $RESPONSE"
           exit 1
       - name: Verify container status
         run: |
           for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK; do
             status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
             health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
             if [ "$status" != "running" ] || [ "$health" != "healthy" ]; then
               echo "ERROR: $service is not healthy (status: $status, health: $health)"
               docker logs $service --tail 50 2>/dev/null || true
               exit 1
             fi
             echo "OK: $service is running and healthy"
           done
       - name: Validate Traefik routing weights
         run: |
           # Verify traffic has actually switched to the new stack
           BLUE_GREEN_CONFIG="$DEPLOY_PATH/config/traefik/dynamic/blue-green.yml"
           if [[ "$TARGET_STACK" == "green" ]]; then
             EXPECTED_TARGET_WEIGHT=100
             EXPECTED_OTHER_WEIGHT=0
             TARGET_SVC="mvp-frontend-green-svc"
           else
             EXPECTED_TARGET_WEIGHT=100
             EXPECTED_OTHER_WEIGHT=0
             TARGET_SVC="mvp-frontend-blue-svc"
           fi
           ACTUAL_WEIGHT=$(grep -A1 "$TARGET_SVC" "$BLUE_GREEN_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1)
           if [[ "$ACTUAL_WEIGHT" != "$EXPECTED_TARGET_WEIGHT" ]]; then
             echo "ERROR: Traffic not routed to $TARGET_STACK stack!"
             echo "Expected weight for $TARGET_SVC: $EXPECTED_TARGET_WEIGHT, Actual: $ACTUAL_WEIGHT"
             cat "$BLUE_GREEN_CONFIG" | grep -A2 weight
             exit 1
           fi
           echo "OK: Traffic correctly routed to $TARGET_STACK (weight: $ACTUAL_WEIGHT)"
   # ============================================
   # ROLLBACK - Auto-rollback on failure
   # ============================================
   rollback:
     name: Auto Rollback
     runs-on: prod
     needs: [validate, deploy-prod, verify-prod]
     if: failure()
     steps:
       - name: Checkout scripts
         uses: actions/checkout@v4
         with:
           sparse-checkout: scripts/
           sparse-checkout-cone-mode: true
           fetch-depth: 1
       - name: Execute rollback
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh"
           DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback"
       - name: Update state
         run: |
           cd "$DEPLOY_PATH"
           STATE_FILE="config/deployment/state.json"
           if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
             jq '.last_deployment_status = "rolled_back"' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
           fi
   # ============================================
   # NOTIFY SUCCESS
   # ============================================
   notify-success:
     name: Notify Success
     runs-on: prod
     needs: [validate, verify-prod]
     if: success()
     steps:
       - name: Checkout scripts only
         uses: actions/checkout@v4
         with:
           sparse-checkout: scripts/
           sparse-checkout-cone-mode: true
           fetch-depth: 1
       - name: Send success notification
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
           "$GITHUB_WORKSPACE/scripts/ci/notify.sh" success "Production deployment successful - ${{ inputs.image_tag }} is now live" ${{ inputs.image_tag }}
         env:
           DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
           RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
   # ============================================
   # NOTIFY FAILURE
   # ============================================
   notify-failure:
     name: Notify Failure
     runs-on: prod
     needs: [validate, deploy-prod, verify-prod, rollback]
     if: failure()
     steps:
       - name: Checkout scripts only
         uses: actions/checkout@v4
         with:
           sparse-checkout: scripts/
           sparse-checkout-cone-mode: true
           fetch-depth: 1
       - name: Send failure notification
         run: |
           chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
           "$GITHUB_WORKSPACE/scripts/ci/notify.sh" failure "Production deployment failed for ${{ inputs.image_tag }}" ${{ inputs.image_tag }}
         env:
           DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
           RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}

468 lines 19 KiB YAML Raw Blame History

468 lines

19 KiB

YAML

Raw Blame History