chore: unify health check timers across compose and workflows

Docker Compose health checks (all services): - interval: 5s (was 10-30s) - timeout: 5s (unified) - backend start_period: 60s (was 30-180s) Gitea workflow health check loops: - Docker healthcheck: 48 attempts x 5s = 4 min (was 24 x 10s) - Backend health: 12 attempts x 5s = 60s (was 6 x 10s) - External health: 12 attempts x 5s = 60s (was 6 x 10s) - Initial waits: 5s (was 10-15s) Same total wait times, faster detection of success/failure. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 21:10:47 -06:00
parent 88db25019f
commit 26196d34ea
4 changed files with 60 additions and 59 deletions
--- a/.gitea/workflows/production.yaml
+++ b/.gitea/workflows/production.yaml
@@ -174,7 +174,7 @@ jobs:
            mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
      - name: Wait for stack initialization
-        run: sleep 10
+        run: sleep 5
      - name: Verify container images
        run: |
@@ -272,8 +272,8 @@ jobs:
            # Check if service has a healthcheck defined
            has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
            if [ "$has_healthcheck" = "true" ]; then
-              # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
+              # 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
-              for i in $(seq 1 24); do
+              for i in $(seq 1 48); do
                health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
                if [ "$health" = "healthy" ]; then
                  echo "OK: $service is healthy"
@@ -283,13 +283,13 @@ jobs:
                  docker logs $service --tail 50 2>/dev/null || true
                  exit 1
                fi
-                if [ $i -eq 24 ]; then
+                if [ $i -eq 48 ]; then
                  echo "ERROR: $service health check timed out (status: $health)"
                  docker logs $service --tail 50 2>/dev/null || true
                  exit 1
                fi
-                echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
+                echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
-                sleep 10
+                sleep 5
              done
            else
              echo "SKIP: $service has no healthcheck defined"
@@ -298,17 +298,17 @@ jobs:
      - name: Wait for backend health
        run: |
-          for i in $(seq 1 24); do
+          for i in $(seq 1 12); do
            if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
              echo "OK: Backend health check passed"
              exit 0
            fi
-            if [ $i -eq 24 ]; then
+            if [ $i -eq 12 ]; then
-              echo "ERROR: Backend health check failed after 6 attempts"
+              echo "ERROR: Backend health check failed after 12 attempts"
              docker logs mvp-backend-$TARGET_STACK --tail 100
              exit 1
            fi
-            echo "Attempt $i/24: Backend not ready, waiting 5s..."
+            echo "Attempt $i/12: Backend not ready, waiting 5s..."
            sleep 5
          done
@@ -316,9 +316,9 @@ jobs:
        run: |
          REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
-          for i in $(seq 1 24); do
+          for i in $(seq 1 12); do
            RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
-              echo "Attempt $i/6: Connection failed, waiting 5s..."
+              echo "Attempt $i/12: Connection failed, waiting 5s..."
              sleep 5
              continue
            }
@@ -326,8 +326,8 @@ jobs:
            # Check status is "healthy"
            STATUS=$(echo "$RESPONSE" | jq -r '.status')
            if [ "$STATUS" != "healthy" ]; then
-              echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
+              echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
-              sleep 10
+              sleep 5
              continue
            fi
@@ -337,8 +337,8 @@ jobs:
            ')
            if [ -n "$MISSING" ]; then
-              echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
+              echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
-              sleep 10
+              sleep 5
              continue
            fi
@@ -347,7 +347,7 @@ jobs:
            exit 0
          done
-          echo "ERROR: Production health check failed after 6 attempts"
+          echo "ERROR: Production health check failed after 12 attempts"
          echo "Last response: $RESPONSE"
          exit 1
--- a/.gitea/workflows/staging.yaml
+++ b/.gitea/workflows/staging.yaml
@@ -173,7 +173,7 @@ jobs:
          docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
      - name: Wait for services
-        run: sleep 15
+        run: sleep 5
  # ============================================
  # VERIFY STAGING - Health checks
@@ -205,8 +205,8 @@ jobs:
            # Check if service has a healthcheck defined
            has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
            if [ "$has_healthcheck" = "true" ]; then
-              # 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
+              # 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
-              for i in $(seq 1 24); do
+              for i in $(seq 1 48); do
                health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
                if [ "$health" = "healthy" ]; then
                  echo "OK: $service is healthy"
@@ -216,13 +216,13 @@ jobs:
                  docker logs $service --tail 50 2>/dev/null || true
                  exit 1
                fi
-                if [ $i -eq 24 ]; then
+                if [ $i -eq 48 ]; then
                  echo "ERROR: $service health check timed out (status: $health)"
                  docker logs $service --tail 50 2>/dev/null || true
                  exit 1
                fi
-                echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
+                echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
-                sleep 10
+                sleep 5
              done
            else
              echo "SKIP: $service has no healthcheck defined"
@@ -231,36 +231,36 @@ jobs:
      - name: Wait for backend health
        run: |
-          for i in 1 2 3 4 5 6; do
+          for i in $(seq 1 12); do
            if docker exec mvp-backend-staging curl -sf http://localhost:3001/health > /dev/null 2>&1; then
              echo "OK: Backend health check passed"
              exit 0
            fi
-            if [ $i -eq 6 ]; then
+            if [ $i -eq 12 ]; then
-              echo "ERROR: Backend health check failed after 6 attempts"
+              echo "ERROR: Backend health check failed after 12 attempts"
              docker logs mvp-backend-staging --tail 100
              exit 1
            fi
-            echo "Attempt $i/6: Backend not ready, waiting 10s..."
+            echo "Attempt $i/12: Backend not ready, waiting 5s..."
-            sleep 10
+            sleep 5
          done
      - name: Check external endpoint
        run: |
          REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
-          for i in 1 2 3 4 5 6; do
+          for i in $(seq 1 12); do
            RESPONSE=$(curl -sf https://staging.motovaultpro.com/api/health 2>/dev/null) || {
-              echo "Attempt $i/6: Connection failed, waiting 10s..."
+              echo "Attempt $i/12: Connection failed, waiting 5s..."
-              sleep 10
+              sleep 5
              continue
            }
            # Check status is "healthy"
            STATUS=$(echo "$RESPONSE" | jq -r '.status')
            if [ "$STATUS" != "healthy" ]; then
-              echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
+              echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
-              sleep 10
+              sleep 5
              continue
            fi
@@ -270,8 +270,8 @@ jobs:
            ')
            if [ -n "$MISSING" ]; then
-              echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
+              echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
-              sleep 10
+              sleep 5
              continue
            fi
@@ -280,7 +280,7 @@ jobs:
            exit 0
          done
-          echo "ERROR: Staging health check failed after 6 attempts"
+          echo "ERROR: Staging health check failed after 12 attempts"
          echo "Last response: $RESPONSE"
          exit 1
--- a/docker-compose.blue-green.yml
+++ b/docker-compose.blue-green.yml
@@ -34,7 +34,7 @@ services:
      - mvp-backend-blue
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
-      interval: 10s
+      interval: 5s
      timeout: 5s
      retries: 3
      start_period: 10s
@@ -89,10 +89,10 @@ services:
      test:
        - CMD-SHELL
        - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
-      interval: 10s
+      interval: 5s
      timeout: 5s
-      retries: 3
+      retries: 5
-      start_period: 30s
+      start_period: 60s
    deploy:
      resources:
        limits:
@@ -125,7 +125,7 @@ services:
      - mvp-backend-green
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
-      interval: 10s
+      interval: 5s
      timeout: 5s
      retries: 3
      start_period: 10s
@@ -180,10 +180,10 @@ services:
      test:
        - CMD-SHELL
        - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
-      interval: 10s
+      interval: 5s
      timeout: 5s
-      retries: 3
+      retries: 5
-      start_period: 30s
+      start_period: 60s
    deploy:
      resources:
        limits:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -29,10 +29,10 @@ services:
      backend:
    healthcheck:
      test: ["CMD", "traefik", "healthcheck"]
-      interval: 30s
+      interval: 5s
-      timeout: 10s
+      timeout: 5s
      retries: 3
-      start_period: 20s
+      start_period: 10s
    labels:
      - "traefik.enable=true"
      - "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)"
@@ -76,10 +76,10 @@ services:
      - mvp-backend
    healthcheck:
      test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
-      interval: 30s
+      interval: 5s
-      timeout: 10s
+      timeout: 5s
      retries: 3
-      start_period: 20s
+      start_period: 10s
    labels:
      - "traefik.enable=true"
      - traefik.docker.network=motovaultpro_frontend
@@ -148,10 +148,10 @@ services:
      test:
        - CMD-SHELL
        - node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
-      interval: 30s
+      interval: 5s
-      timeout: 10s
+      timeout: 5s
      retries: 5
-      start_period: 180s
+      start_period: 60s
    labels:
      - "traefik.enable=true"
      - "traefik.docker.network=motovaultpro_backend"
@@ -198,10 +198,10 @@ services:
      - mvp-redis
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
-      interval: 30s
+      interval: 5s
-      timeout: 10s
+      timeout: 5s
      retries: 3
-      start_period: 30s
+      start_period: 15s
    logging:
      driver: json-file
      options:
@@ -230,10 +230,10 @@ services:
      - "5432:5432"  # Development access only
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U postgres"]
-      interval: 10s
+      interval: 5s
      timeout: 5s
      retries: 5
-      start_period: 30s
+      start_period: 15s
    logging:
      driver: json-file
      options:
@@ -254,9 +254,10 @@ services:
      - "6379:6379"  # Development access only
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
+      interval: 5s
      timeout: 5s
      retries: 5
      start_period: 5s
    logging:
      driver: json-file
      options: