# MotoVaultPro Production Deployment Workflow # Manual trigger only - run after verifying staging # Blue-green deployment with auto-rollback # # Optimization: Uses sparse checkout (scripts/ only) + shallow clone # since all scripts run from $DEPLOY_PATH on the production server name: Deploy to Production run-name: Production Deploy - ${{ inputs.image_tag || 'latest' }} on: workflow_dispatch: inputs: image_tag: description: 'Image tag to deploy (defaults to latest)' required: false default: 'latest' env: REGISTRY: git.motovaultpro.com DEPLOY_PATH: /opt/motovaultpro BASE_COMPOSE_FILE: docker-compose.yml COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml COMPOSE_PROD: docker-compose.prod.yml HEALTH_CHECK_TIMEOUT: "240" LOG_LEVEL: INFO jobs: # ============================================ # VALIDATE - Determine target stack # ============================================ validate: name: Validate Prerequisites runs-on: prod outputs: target_stack: ${{ steps.determine-stack.outputs.target_stack }} backend_image: ${{ steps.set-images.outputs.backend_image }} frontend_image: ${{ steps.set-images.outputs.frontend_image }} ocr_image: ${{ steps.set-images.outputs.ocr_image }} steps: - name: Check Docker availability run: | docker info > /dev/null 2>&1 || (echo "ERROR - Docker not accessible" && exit 1) docker compose version > /dev/null 2>&1 || (echo "ERROR - Docker Compose not available" && exit 1) - name: Check deployment path run: test -d "$DEPLOY_PATH" || (echo "ERROR - DEPLOY_PATH not found" && exit 1) - name: Login to Gitea Container Registry run: | echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY" - name: Set image tags id: set-images run: | TAG="${{ inputs.image_tag }}" echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT - name: Determine target stack id: determine-stack run: | STATE_FILE="$DEPLOY_PATH/config/deployment/state.json" if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then ACTIVE_STACK=$(jq -r '.active_stack // "blue"' "$STATE_FILE") if [ "$ACTIVE_STACK" = "blue" ]; then echo "target_stack=green" >> $GITHUB_OUTPUT echo "Deploying to GREEN stack (BLUE is currently active)" else echo "target_stack=blue" >> $GITHUB_OUTPUT echo "Deploying to BLUE stack (GREEN is currently active)" fi else echo "target_stack=green" >> $GITHUB_OUTPUT echo "No state file found, defaulting to GREEN stack" fi # ============================================ # DEPLOY PROD - Blue-green deployment # ============================================ deploy-prod: name: Deploy to Production runs-on: prod needs: validate env: TARGET_STACK: ${{ needs.validate.outputs.target_stack }} BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }} FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }} OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }} steps: - name: Checkout scripts, config, and compose files uses: actions/checkout@v4 with: sparse-checkout: | scripts/ config/ secrets/app/google-wif-config.json docker-compose.yml docker-compose.blue-green.yml docker-compose.prod.yml sparse-checkout-cone-mode: false fetch-depth: 1 - name: Sync config, scripts, and compose files to deploy path run: | rsync -av --delete "$GITHUB_WORKSPACE/config/" "$DEPLOY_PATH/config/" rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/" cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/" cp "$GITHUB_WORKSPACE/docker-compose.prod.yml" "$DEPLOY_PATH/" # WIF credential config (not a secret -- references Auth0 token script path) # Remove any Docker-created directory artifact from failed bind mounts rm -rf "$DEPLOY_PATH/secrets/app/google-wif-config.json" mkdir -p "$DEPLOY_PATH/secrets/app" cp "$GITHUB_WORKSPACE/secrets/app/google-wif-config.json" "$DEPLOY_PATH/secrets/app/" - name: Generate logging configuration run: | cd "$DEPLOY_PATH" chmod +x scripts/ci/generate-log-config.sh ./scripts/ci/generate-log-config.sh "$LOG_LEVEL" - name: Login to registry run: | echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY" - name: Inject secrets run: | cd "$DEPLOY_PATH" chmod +x scripts/inject-secrets.sh SECRETS_DIR="$DEPLOY_PATH/secrets/app" ./scripts/inject-secrets.sh env: POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }} AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }} AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }} AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }} AUTH0_OCR_CLIENT_ID: ${{ secrets.AUTH0_OCR_CLIENT_ID }} AUTH0_OCR_CLIENT_SECRET: ${{ secrets.AUTH0_OCR_CLIENT_SECRET }} GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }} GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }} CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }} RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }} STRIPE_SECRET_KEY: ${{ secrets.STRIPE_SECRET_KEY }} STRIPE_WEBHOOK_SECRET: ${{ secrets.STRIPE_WEBHOOK_SECRET }} - name: Initialize data directories run: | cd "$DEPLOY_PATH" sudo mkdir -p data/backups data/documents data/traefik sudo chown -R 1001:1001 data/backups data/documents sudo chmod 755 data/backups data/documents # Traefik acme.json requires 600 permissions if [ ! -f data/traefik/acme.json ]; then sudo touch data/traefik/acme.json fi sudo chmod 600 data/traefik/acme.json - name: Pull new images run: | docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE docker pull $OCR_IMAGE - name: Record expected image IDs id: expected-images run: | # Get the image IDs we just pulled - these are what containers should use FRONTEND_ID=$(docker images --format '{{.ID}}' $FRONTEND_IMAGE | head -1) BACKEND_ID=$(docker images --format '{{.ID}}' $BACKEND_IMAGE | head -1) echo "Expected frontend image ID: $FRONTEND_ID" echo "Expected backend image ID: $BACKEND_ID" echo "frontend_id=$FRONTEND_ID" >> $GITHUB_OUTPUT echo "backend_id=$BACKEND_ID" >> $GITHUB_OUTPUT - name: Start shared services run: | cd "$DEPLOY_PATH" # Start shared infrastructure services (database, cache, logging) # --no-recreate prevents restarting postgres/redis when config files change # These must persist across blue-green deployments to avoid data service disruption docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --no-recreate \ mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana - name: Wait for shared services health run: | echo "Waiting for PostgreSQL and Redis to be healthy..." for service in mvp-postgres mvp-redis; do for i in $(seq 1 24); do health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") if [ "$health" = "healthy" ]; then echo "OK: $service is healthy" break fi if [ $i -eq 24 ]; then echo "ERROR: $service health check timed out (status: $health)" docker logs $service --tail 50 2>/dev/null || true exit 1 fi echo "Waiting for $service... (attempt $i/24, status: $health)" sleep 5 done done echo "All shared services healthy" - name: Start target stack run: | cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE export OCR_IMAGE=$OCR_IMAGE # --force-recreate ensures containers are recreated even if image tag is same # This prevents stale container content when image digest changes # Start shared OCR service and target stack docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \ mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK - name: Wait for stack initialization run: sleep 5 - name: Verify container images run: | # Verify containers are running the expected images EXPECTED_FRONTEND="${{ steps.expected-images.outputs.frontend_id }}" EXPECTED_BACKEND="${{ steps.expected-images.outputs.backend_id }}" RUNNING_FRONTEND=$(docker inspect --format='{{.Image}}' mvp-frontend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12) RUNNING_BACKEND=$(docker inspect --format='{{.Image}}' mvp-backend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12) echo "Frontend - Expected: $EXPECTED_FRONTEND, Running: $RUNNING_FRONTEND" echo "Backend - Expected: $EXPECTED_BACKEND, Running: $RUNNING_BACKEND" if [[ "$RUNNING_FRONTEND" != "$EXPECTED_FRONTEND" ]]; then echo "ERROR: Frontend container not using expected image!" echo "Container may be stale. Force recreate should have prevented this." exit 1 fi if [[ "$RUNNING_BACKEND" != "$EXPECTED_BACKEND" ]]; then echo "ERROR: Backend container not using expected image!" exit 1 fi echo "OK: All containers using correct images" - name: Run health check run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT - name: Start Traefik run: | cd "$DEPLOY_PATH" docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik - name: Wait for Traefik run: | echo "Waiting for Traefik to be healthy..." timeout 30 bash -c "until docker inspect --format='{{.State.Health.Status}}' mvp-traefik 2>/dev/null | grep -q healthy; do sleep 2; done" || { echo "Traefik health check timed out, checking status..." docker inspect --format='{{.State.Status}}' mvp-traefik docker logs mvp-traefik --tail 20 exit 1 } echo "Traefik is healthy" - name: Switch traffic run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" # DEPLOY_PATH ensures script modifies config at /opt/motovaultpro, not checkout dir DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant - name: Update deployment state run: | cd "$DEPLOY_PATH" STATE_FILE="config/deployment/state.json" if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") jq --arg commit "${{ inputs.image_tag }}" \ --arg ts "$TIMESTAMP" \ '.last_deployment = $ts | .last_deployment_commit = $commit | .last_deployment_status = "success" | .rollback_available = true' \ "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE" fi # ============================================ # VERIFY PROD - External health check # ============================================ verify-prod: name: Verify Production runs-on: prod needs: [validate, deploy-prod] env: TARGET_STACK: ${{ needs.validate.outputs.target_stack }} steps: - name: Wait for routing propagation run: sleep 5 - name: Check container status and health run: | for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found") if [ "$status" != "running" ]; then echo "ERROR: $service is not running (status: $status)" docker logs $service --tail 50 2>/dev/null || true exit 1 fi echo "OK: $service is running" done # Wait for Docker healthchecks to complete (services with healthcheck defined) echo "" echo "Waiting for Docker healthchecks..." for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do # Check if service has a healthcheck defined has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false") if [ "$has_healthcheck" = "true" ]; then # 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min) for i in $(seq 1 48); do health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") if [ "$health" = "healthy" ]; then echo "OK: $service is healthy" break fi # Don't fail immediately on unhealthy - container may still be starting up # and can recover. Let the timeout handle truly broken containers. if [ $i -eq 48 ]; then echo "ERROR: $service health check timed out (status: $health)" docker logs $service --tail 100 2>/dev/null || true exit 1 fi echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)" sleep 5 done else echo "SKIP: $service has no healthcheck defined" fi done - name: Wait for backend health run: | for i in $(seq 1 12); do if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then echo "OK: Backend health check passed" exit 0 fi if [ $i -eq 12 ]; then echo "ERROR: Backend health check failed after 12 attempts" docker logs mvp-backend-$TARGET_STACK --tail 100 exit 1 fi echo "Attempt $i/12: Backend not ready, waiting 5s..." sleep 5 done - name: External health check run: | REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]' for i in $(seq 1 12); do RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || { echo "Attempt $i/12: Connection failed, waiting 5s..." sleep 5 continue } # Check status is "healthy" STATUS=$(echo "$RESPONSE" | jq -r '.status') if [ "$STATUS" != "healthy" ]; then echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..." sleep 5 continue fi # Check all required features are present MISSING=$(echo "$RESPONSE" | jq -r --argjson required "$REQUIRED_FEATURES" ' $required - .features | if length > 0 then . else empty end | @json ') if [ -n "$MISSING" ]; then echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..." sleep 5 continue fi FEATURE_COUNT=$(echo "$RESPONSE" | jq '.features | length') echo "OK: Production health check passed - status: healthy, features: $FEATURE_COUNT" exit 0 done echo "ERROR: Production health check failed after 12 attempts" echo "Last response: $RESPONSE" exit 1 - name: Verify container status run: | for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK; do status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found") health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") if [ "$status" != "running" ] || [ "$health" != "healthy" ]; then echo "ERROR: $service is not healthy (status: $status, health: $health)" docker logs $service --tail 50 2>/dev/null || true exit 1 fi echo "OK: $service is running and healthy" done - name: Validate Traefik routing weights run: | # Verify traffic has actually switched to the new stack BLUE_GREEN_CONFIG="$DEPLOY_PATH/config/traefik/dynamic/blue-green.yml" if [[ "$TARGET_STACK" == "green" ]]; then EXPECTED_TARGET_WEIGHT=100 EXPECTED_OTHER_WEIGHT=0 TARGET_SVC="mvp-frontend-green-svc" else EXPECTED_TARGET_WEIGHT=100 EXPECTED_OTHER_WEIGHT=0 TARGET_SVC="mvp-frontend-blue-svc" fi ACTUAL_WEIGHT=$(grep -A1 "$TARGET_SVC" "$BLUE_GREEN_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1) if [[ "$ACTUAL_WEIGHT" != "$EXPECTED_TARGET_WEIGHT" ]]; then echo "ERROR: Traffic not routed to $TARGET_STACK stack!" echo "Expected weight for $TARGET_SVC: $EXPECTED_TARGET_WEIGHT, Actual: $ACTUAL_WEIGHT" cat "$BLUE_GREEN_CONFIG" | grep -A2 weight exit 1 fi echo "OK: Traffic correctly routed to $TARGET_STACK (weight: $ACTUAL_WEIGHT)" # ============================================ # ROLLBACK - Auto-rollback on failure # ============================================ rollback: name: Auto Rollback runs-on: prod needs: [validate, deploy-prod, verify-prod] if: failure() steps: - name: Checkout scripts uses: actions/checkout@v4 with: sparse-checkout: scripts/ sparse-checkout-cone-mode: true fetch-depth: 1 - name: Execute rollback run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback" - name: Update state run: | cd "$DEPLOY_PATH" STATE_FILE="config/deployment/state.json" if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then jq '.last_deployment_status = "rolled_back"' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE" fi # ============================================ # NOTIFY SUCCESS # ============================================ notify-success: name: Notify Success runs-on: prod needs: [validate, verify-prod] if: success() steps: - name: Checkout scripts only uses: actions/checkout@v4 with: sparse-checkout: scripts/ sparse-checkout-cone-mode: true fetch-depth: 1 - name: Send success notification run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh" "$GITHUB_WORKSPACE/scripts/ci/notify.sh" success "Production deployment successful - ${{ inputs.image_tag }} is now live" ${{ inputs.image_tag }} env: DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }} RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }} # ============================================ # NOTIFY FAILURE # ============================================ notify-failure: name: Notify Failure runs-on: prod needs: [validate, deploy-prod, verify-prod, rollback] if: failure() steps: - name: Checkout scripts only uses: actions/checkout@v4 with: sparse-checkout: scripts/ sparse-checkout-cone-mode: true fetch-depth: 1 - name: Send failure notification run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh" "$GITHUB_WORKSPACE/scripts/ci/notify.sh" failure "Production deployment failed for ${{ inputs.image_tag }}" ${{ inputs.image_tag }} env: DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }} RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}