Files
motovaultpro/scripts/ci/auto-rollback.sh
Eric Gullickson 13abbc16d7 fix: CI/CD blue-green deployment path bug causing stale production content
Root cause: switch-traffic.sh was modifying Traefik config in the CI checkout
directory ($GITHUB_WORKSPACE) instead of the deployment directory ($DEPLOY_PATH).
Traefik never saw the weight changes, so traffic stayed on old containers.

Changes:
- Add DEPLOY_PATH environment variable support to all CI scripts
- Add --force-recreate flag to ensure containers are recreated with new images
- Add image verification step to confirm containers use expected images
- Add weight verification to confirm Traefik routing was updated
- Add routing validation step to verify traffic switch succeeded

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-31 10:37:18 -06:00

156 lines
4.6 KiB
Bash
Executable File

#!/bin/bash
# Auto-rollback script for blue-green deployment
# Reverts traffic to the previous healthy stack
#
# Usage: ./auto-rollback.sh [reason]
# reason: Optional description of why rollback is happening
#
# Exit codes:
# 0 - Rollback successful
# 1 - Rollback failed
set -euo pipefail
# Use DEPLOY_PATH if set (CI environment), otherwise calculate from script location
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="${DEPLOY_PATH:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
echo "Using PROJECT_ROOT: $PROJECT_ROOT"
REASON="${1:-Automatic rollback triggered}"
STATE_FILE="$PROJECT_ROOT/config/deployment/state.json"
SWITCH_SCRIPT="$SCRIPT_DIR/switch-traffic.sh"
HEALTH_SCRIPT="$SCRIPT_DIR/health-check.sh"
NOTIFY_SCRIPT="$SCRIPT_DIR/notify.sh"
echo "========================================"
echo "AUTO-ROLLBACK INITIATED"
echo "Reason: $REASON"
echo "Time: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
echo "========================================"
# Determine current and rollback stacks
if [[ -f "$STATE_FILE" ]] && command -v jq &> /dev/null; then
CURRENT_STACK=$(jq -r '.active_stack // "unknown"' "$STATE_FILE")
ROLLBACK_STACK=$(jq -r '.inactive_stack // "unknown"' "$STATE_FILE")
else
echo "ERROR: Cannot determine current stack state"
echo "State file: $STATE_FILE"
exit 1
fi
if [[ "$CURRENT_STACK" == "unknown" ]] || [[ "$ROLLBACK_STACK" == "unknown" ]]; then
echo "ERROR: Invalid stack state"
echo " Current: $CURRENT_STACK"
echo " Rollback target: $ROLLBACK_STACK"
exit 1
fi
echo ""
echo "Stack Status:"
echo " Currently active: $CURRENT_STACK"
echo " Rollback target: $ROLLBACK_STACK"
echo ""
# Verify rollback stack is healthy before switching
echo "Step 1/3: Verifying rollback stack health..."
echo "----------------------------------------"
if [[ -x "$HEALTH_SCRIPT" ]]; then
if ! "$HEALTH_SCRIPT" "$ROLLBACK_STACK" 30; then
echo ""
echo "CRITICAL: Rollback stack ($ROLLBACK_STACK) is NOT healthy!"
echo "Manual intervention required."
echo ""
echo "Troubleshooting steps:"
echo " 1. Check container logs: docker logs mvp-backend-$ROLLBACK_STACK"
echo " 2. Check container status: docker ps -a"
echo " 3. Consider restarting rollback stack"
echo ""
# Send critical notification
if [[ -x "$NOTIFY_SCRIPT" ]]; then
"$NOTIFY_SCRIPT" "rollback_failed" \
"Rollback to $ROLLBACK_STACK failed - stack unhealthy. Manual intervention required. Reason: $REASON" \
|| true
fi
exit 1
fi
echo " OK: Rollback stack is healthy"
else
echo " WARNING: Health check script not found, proceeding anyway"
fi
# Switch traffic to rollback stack
echo ""
echo "Step 2/3: Switching traffic to $ROLLBACK_STACK..."
echo "----------------------------------------"
if [[ -x "$SWITCH_SCRIPT" ]]; then
if ! "$SWITCH_SCRIPT" "$ROLLBACK_STACK" instant; then
echo "ERROR: Traffic switch failed"
if [[ -x "$NOTIFY_SCRIPT" ]]; then
"$NOTIFY_SCRIPT" "rollback_failed" \
"Rollback traffic switch failed. Manual intervention required. Reason: $REASON" \
|| true
fi
exit 1
fi
else
echo "ERROR: Traffic switch script not found: $SWITCH_SCRIPT"
exit 1
fi
# Update state file with rollback info
echo ""
echo "Step 3/3: Updating deployment state..."
echo "----------------------------------------"
if [[ -f "$STATE_FILE" ]] && command -v jq &> /dev/null; then
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
jq --arg stack "$ROLLBACK_STACK" \
--arg reason "$REASON" \
--arg ts "$TIMESTAMP" \
--arg failed "$CURRENT_STACK" \
'.active_stack = $stack |
.inactive_stack = $failed |
.last_rollback = $ts |
.last_rollback_reason = $reason |
.rollback_available = false' \
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
echo " State updated"
fi
# Send notification
if [[ -x "$NOTIFY_SCRIPT" ]]; then
echo ""
echo "Sending rollback notification..."
"$NOTIFY_SCRIPT" "rollback" \
"Rollback executed. Traffic switched from $CURRENT_STACK to $ROLLBACK_STACK. Reason: $REASON" \
|| echo " WARNING: Notification failed"
fi
echo ""
echo "========================================"
echo "ROLLBACK COMPLETE"
echo "========================================"
echo ""
echo "Summary:"
echo " Previous stack: $CURRENT_STACK (now inactive)"
echo " Current stack: $ROLLBACK_STACK (now active)"
echo " Reason: $REASON"
echo ""
echo "Next steps:"
echo " 1. Investigate why $CURRENT_STACK failed"
echo " 2. Check logs: docker logs mvp-backend-$CURRENT_STACK"
echo " 3. Fix issues before next deployment"
echo ""
exit 0