From 13abbc16d726d67eb5dbd33f1b1efb2c6c2b3556 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Wed, 31 Dec 2025 10:37:18 -0600 Subject: [PATCH] fix: CI/CD blue-green deployment path bug causing stale production content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: switch-traffic.sh was modifying Traefik config in the CI checkout directory ($GITHUB_WORKSPACE) instead of the deployment directory ($DEPLOY_PATH). Traefik never saw the weight changes, so traffic stayed on old containers. Changes: - Add DEPLOY_PATH environment variable support to all CI scripts - Add --force-recreate flag to ensure containers are recreated with new images - Add image verification step to confirm containers use expected images - Add weight verification to confirm Traefik routing was updated - Add routing validation step to verify traffic switch succeeded 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .gitea/workflows/production.yaml | 73 ++++++++++++++++++++++++++++++-- scripts/ci/auto-rollback.sh | 5 ++- scripts/ci/health-check.sh | 6 ++- scripts/ci/switch-traffic.sh | 31 +++++++++++++- 4 files changed, 106 insertions(+), 9 deletions(-) diff --git a/.gitea/workflows/production.yaml b/.gitea/workflows/production.yaml index 9714ec0..cc514fa 100644 --- a/.gitea/workflows/production.yaml +++ b/.gitea/workflows/production.yaml @@ -136,21 +136,59 @@ jobs: docker pull $BACKEND_IMAGE docker pull $FRONTEND_IMAGE + - name: Record expected image IDs + id: expected-images + run: | + # Get the image IDs we just pulled - these are what containers should use + FRONTEND_ID=$(docker images --format '{{.ID}}' $FRONTEND_IMAGE | head -1) + BACKEND_ID=$(docker images --format '{{.ID}}' $BACKEND_IMAGE | head -1) + echo "Expected frontend image ID: $FRONTEND_ID" + echo "Expected backend image ID: $BACKEND_ID" + echo "frontend_id=$FRONTEND_ID" >> $GITHUB_OUTPUT + echo "backend_id=$BACKEND_ID" >> $GITHUB_OUTPUT + - name: Start target stack run: | cd "$DEPLOY_PATH" export BACKEND_IMAGE=$BACKEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE - docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d \ + # --force-recreate ensures containers are recreated even if image tag is same + # This prevents stale container content when image digest changes + docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN up -d --force-recreate \ mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK - name: Wait for stack initialization run: sleep 10 + - name: Verify container images + run: | + # Verify containers are running the expected images + EXPECTED_FRONTEND="${{ steps.expected-images.outputs.frontend_id }}" + EXPECTED_BACKEND="${{ steps.expected-images.outputs.backend_id }}" + + RUNNING_FRONTEND=$(docker inspect --format='{{.Image}}' mvp-frontend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12) + RUNNING_BACKEND=$(docker inspect --format='{{.Image}}' mvp-backend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12) + + echo "Frontend - Expected: $EXPECTED_FRONTEND, Running: $RUNNING_FRONTEND" + echo "Backend - Expected: $EXPECTED_BACKEND, Running: $RUNNING_BACKEND" + + if [[ "$RUNNING_FRONTEND" != "$EXPECTED_FRONTEND" ]]; then + echo "ERROR: Frontend container not using expected image!" + echo "Container may be stale. Force recreate should have prevented this." + exit 1 + fi + + if [[ "$RUNNING_BACKEND" != "$EXPECTED_BACKEND" ]]; then + echo "ERROR: Backend container not using expected image!" + exit 1 + fi + + echo "OK: All containers using correct images" + - name: Run health check run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" - "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT + DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT - name: Start Traefik run: | @@ -171,7 +209,8 @@ jobs: - name: Switch traffic run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" - "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant + # DEPLOY_PATH ensures script modifies config at /opt/motovaultpro, not checkout dir + DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant - name: Update deployment state run: | @@ -250,6 +289,32 @@ jobs: echo "OK: $service is running and healthy" done + - name: Validate Traefik routing weights + run: | + # Verify traffic has actually switched to the new stack + BLUE_GREEN_CONFIG="$DEPLOY_PATH/config/traefik/dynamic/blue-green.yml" + + if [[ "$TARGET_STACK" == "green" ]]; then + EXPECTED_TARGET_WEIGHT=100 + EXPECTED_OTHER_WEIGHT=0 + TARGET_SVC="mvp-frontend-green-svc" + else + EXPECTED_TARGET_WEIGHT=100 + EXPECTED_OTHER_WEIGHT=0 + TARGET_SVC="mvp-frontend-blue-svc" + fi + + ACTUAL_WEIGHT=$(grep -A1 "$TARGET_SVC" "$BLUE_GREEN_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1) + + if [[ "$ACTUAL_WEIGHT" != "$EXPECTED_TARGET_WEIGHT" ]]; then + echo "ERROR: Traffic not routed to $TARGET_STACK stack!" + echo "Expected weight for $TARGET_SVC: $EXPECTED_TARGET_WEIGHT, Actual: $ACTUAL_WEIGHT" + cat "$BLUE_GREEN_CONFIG" | grep -A2 weight + exit 1 + fi + + echo "OK: Traffic correctly routed to $TARGET_STACK (weight: $ACTUAL_WEIGHT)" + # ============================================ # ROLLBACK - Auto-rollback on failure # ============================================ @@ -269,7 +334,7 @@ jobs: - name: Execute rollback run: | chmod +x "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" - "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback" + DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback" - name: Update state run: | diff --git a/scripts/ci/auto-rollback.sh b/scripts/ci/auto-rollback.sh index 44e72ca..23f0ae2 100755 --- a/scripts/ci/auto-rollback.sh +++ b/scripts/ci/auto-rollback.sh @@ -11,8 +11,11 @@ set -euo pipefail +# Use DEPLOY_PATH if set (CI environment), otherwise calculate from script location SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +PROJECT_ROOT="${DEPLOY_PATH:-$(cd "$SCRIPT_DIR/../.." && pwd)}" + +echo "Using PROJECT_ROOT: $PROJECT_ROOT" REASON="${1:-Automatic rollback triggered}" diff --git a/scripts/ci/health-check.sh b/scripts/ci/health-check.sh index 45d2ee1..fa7b388 100755 --- a/scripts/ci/health-check.sh +++ b/scripts/ci/health-check.sh @@ -12,8 +12,10 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# Use DEPLOY_PATH if set (CI environment), otherwise calculate from script location +PROJECT_ROOT="${DEPLOY_PATH:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}" + +echo "Using PROJECT_ROOT: $PROJECT_ROOT" STACK="${1:-}" TIMEOUT="${2:-60}" diff --git a/scripts/ci/switch-traffic.sh b/scripts/ci/switch-traffic.sh index 2c9546f..aa940a7 100755 --- a/scripts/ci/switch-traffic.sh +++ b/scripts/ci/switch-traffic.sh @@ -14,8 +14,12 @@ set -euo pipefail -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# Use DEPLOY_PATH if set (CI environment), otherwise calculate from script location +# This is critical: CI workflows must pass DEPLOY_PATH to ensure we modify +# the actual deployment config, not the checkout directory +PROJECT_ROOT="${DEPLOY_PATH:-$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)}" + +echo "Using PROJECT_ROOT: $PROJECT_ROOT" TARGET_STACK="${1:-}" MODE="${2:-instant}" @@ -71,6 +75,27 @@ update_weights() { sleep 1 } +# Verify weights were actually written to config file +verify_weights_applied() { + local expected_blue="$1" + local expected_green="$2" + + # Extract actual weights from the config file + local actual_blue=$(grep -A1 "mvp-frontend-blue-svc" "$TRAEFIK_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1) + local actual_green=$(grep -A1 "mvp-frontend-green-svc" "$TRAEFIK_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1) + + if [[ "$actual_blue" != "$expected_blue" ]] || [[ "$actual_green" != "$expected_green" ]]; then + echo " ERROR: Weight verification failed!" + echo " Expected: blue=$expected_blue, green=$expected_green" + echo " Actual: blue=$actual_blue, green=$actual_green" + echo " Config file: $TRAEFIK_CONFIG" + return 1 + fi + + echo " OK: Weights verified (blue=$actual_blue, green=$actual_green)" + return 0 +} + # Verify Traefik has picked up the changes verify_traefik_reload() { # Give Traefik time to reload config @@ -123,8 +148,10 @@ else if [[ "$TARGET_STACK" == "blue" ]]; then update_weights 100 0 + verify_weights_applied 100 0 || exit 1 else update_weights 0 100 + verify_weights_applied 0 100 || exit 1 fi verify_traefik_reload