chore: unify health check timers across compose and workflows
Some checks failed
Deploy to Staging / Build Images (push) Successful in 32s
Deploy to Staging / Deploy to Staging (push) Successful in 21s
Deploy to Staging / Verify Staging (push) Failing after 1m18s
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 7s
Some checks failed
Deploy to Staging / Build Images (push) Successful in 32s
Deploy to Staging / Deploy to Staging (push) Successful in 21s
Deploy to Staging / Verify Staging (push) Failing after 1m18s
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 7s
Docker Compose health checks (all services): - interval: 5s (was 10-30s) - timeout: 5s (unified) - backend start_period: 60s (was 30-180s) Gitea workflow health check loops: - Docker healthcheck: 48 attempts x 5s = 4 min (was 24 x 10s) - Backend health: 12 attempts x 5s = 60s (was 6 x 10s) - External health: 12 attempts x 5s = 60s (was 6 x 10s) - Initial waits: 5s (was 10-15s) Same total wait times, faster detection of success/failure. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -174,7 +174,7 @@ jobs:
|
|||||||
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
|
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
|
||||||
|
|
||||||
- name: Wait for stack initialization
|
- name: Wait for stack initialization
|
||||||
run: sleep 10
|
run: sleep 5
|
||||||
|
|
||||||
- name: Verify container images
|
- name: Verify container images
|
||||||
run: |
|
run: |
|
||||||
@@ -272,8 +272,8 @@ jobs:
|
|||||||
# Check if service has a healthcheck defined
|
# Check if service has a healthcheck defined
|
||||||
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
|
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
|
||||||
if [ "$has_healthcheck" = "true" ]; then
|
if [ "$has_healthcheck" = "true" ]; then
|
||||||
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
|
# 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
|
||||||
for i in $(seq 1 24); do
|
for i in $(seq 1 48); do
|
||||||
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
|
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
|
||||||
if [ "$health" = "healthy" ]; then
|
if [ "$health" = "healthy" ]; then
|
||||||
echo "OK: $service is healthy"
|
echo "OK: $service is healthy"
|
||||||
@@ -283,13 +283,13 @@ jobs:
|
|||||||
docker logs $service --tail 50 2>/dev/null || true
|
docker logs $service --tail 50 2>/dev/null || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ $i -eq 24 ]; then
|
if [ $i -eq 48 ]; then
|
||||||
echo "ERROR: $service health check timed out (status: $health)"
|
echo "ERROR: $service health check timed out (status: $health)"
|
||||||
docker logs $service --tail 50 2>/dev/null || true
|
docker logs $service --tail 50 2>/dev/null || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
|
echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
|
||||||
sleep 10
|
sleep 5
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
echo "SKIP: $service has no healthcheck defined"
|
echo "SKIP: $service has no healthcheck defined"
|
||||||
@@ -298,17 +298,17 @@ jobs:
|
|||||||
|
|
||||||
- name: Wait for backend health
|
- name: Wait for backend health
|
||||||
run: |
|
run: |
|
||||||
for i in $(seq 1 24); do
|
for i in $(seq 1 12); do
|
||||||
if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
|
if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
|
||||||
echo "OK: Backend health check passed"
|
echo "OK: Backend health check passed"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
if [ $i -eq 24 ]; then
|
if [ $i -eq 12 ]; then
|
||||||
echo "ERROR: Backend health check failed after 6 attempts"
|
echo "ERROR: Backend health check failed after 12 attempts"
|
||||||
docker logs mvp-backend-$TARGET_STACK --tail 100
|
docker logs mvp-backend-$TARGET_STACK --tail 100
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Attempt $i/24: Backend not ready, waiting 5s..."
|
echo "Attempt $i/12: Backend not ready, waiting 5s..."
|
||||||
sleep 5
|
sleep 5
|
||||||
done
|
done
|
||||||
|
|
||||||
@@ -316,9 +316,9 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
|
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
|
||||||
|
|
||||||
for i in $(seq 1 24); do
|
for i in $(seq 1 12); do
|
||||||
RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
|
RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
|
||||||
echo "Attempt $i/6: Connection failed, waiting 5s..."
|
echo "Attempt $i/12: Connection failed, waiting 5s..."
|
||||||
sleep 5
|
sleep 5
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -326,8 +326,8 @@ jobs:
|
|||||||
# Check status is "healthy"
|
# Check status is "healthy"
|
||||||
STATUS=$(echo "$RESPONSE" | jq -r '.status')
|
STATUS=$(echo "$RESPONSE" | jq -r '.status')
|
||||||
if [ "$STATUS" != "healthy" ]; then
|
if [ "$STATUS" != "healthy" ]; then
|
||||||
echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
|
echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -337,8 +337,8 @@ jobs:
|
|||||||
')
|
')
|
||||||
|
|
||||||
if [ -n "$MISSING" ]; then
|
if [ -n "$MISSING" ]; then
|
||||||
echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
|
echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -347,7 +347,7 @@ jobs:
|
|||||||
exit 0
|
exit 0
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "ERROR: Production health check failed after 6 attempts"
|
echo "ERROR: Production health check failed after 12 attempts"
|
||||||
echo "Last response: $RESPONSE"
|
echo "Last response: $RESPONSE"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
|
|||||||
@@ -173,7 +173,7 @@ jobs:
|
|||||||
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
|
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
|
||||||
|
|
||||||
- name: Wait for services
|
- name: Wait for services
|
||||||
run: sleep 15
|
run: sleep 5
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# VERIFY STAGING - Health checks
|
# VERIFY STAGING - Health checks
|
||||||
@@ -205,8 +205,8 @@ jobs:
|
|||||||
# Check if service has a healthcheck defined
|
# Check if service has a healthcheck defined
|
||||||
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
|
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
|
||||||
if [ "$has_healthcheck" = "true" ]; then
|
if [ "$has_healthcheck" = "true" ]; then
|
||||||
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
|
# 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
|
||||||
for i in $(seq 1 24); do
|
for i in $(seq 1 48); do
|
||||||
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
|
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
|
||||||
if [ "$health" = "healthy" ]; then
|
if [ "$health" = "healthy" ]; then
|
||||||
echo "OK: $service is healthy"
|
echo "OK: $service is healthy"
|
||||||
@@ -216,13 +216,13 @@ jobs:
|
|||||||
docker logs $service --tail 50 2>/dev/null || true
|
docker logs $service --tail 50 2>/dev/null || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ $i -eq 24 ]; then
|
if [ $i -eq 48 ]; then
|
||||||
echo "ERROR: $service health check timed out (status: $health)"
|
echo "ERROR: $service health check timed out (status: $health)"
|
||||||
docker logs $service --tail 50 2>/dev/null || true
|
docker logs $service --tail 50 2>/dev/null || true
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
|
echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
|
||||||
sleep 10
|
sleep 5
|
||||||
done
|
done
|
||||||
else
|
else
|
||||||
echo "SKIP: $service has no healthcheck defined"
|
echo "SKIP: $service has no healthcheck defined"
|
||||||
@@ -231,36 +231,36 @@ jobs:
|
|||||||
|
|
||||||
- name: Wait for backend health
|
- name: Wait for backend health
|
||||||
run: |
|
run: |
|
||||||
for i in 1 2 3 4 5 6; do
|
for i in $(seq 1 12); do
|
||||||
if docker exec mvp-backend-staging curl -sf http://localhost:3001/health > /dev/null 2>&1; then
|
if docker exec mvp-backend-staging curl -sf http://localhost:3001/health > /dev/null 2>&1; then
|
||||||
echo "OK: Backend health check passed"
|
echo "OK: Backend health check passed"
|
||||||
exit 0
|
exit 0
|
||||||
fi
|
fi
|
||||||
if [ $i -eq 6 ]; then
|
if [ $i -eq 12 ]; then
|
||||||
echo "ERROR: Backend health check failed after 6 attempts"
|
echo "ERROR: Backend health check failed after 12 attempts"
|
||||||
docker logs mvp-backend-staging --tail 100
|
docker logs mvp-backend-staging --tail 100
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
echo "Attempt $i/6: Backend not ready, waiting 10s..."
|
echo "Attempt $i/12: Backend not ready, waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
done
|
done
|
||||||
|
|
||||||
- name: Check external endpoint
|
- name: Check external endpoint
|
||||||
run: |
|
run: |
|
||||||
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
|
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
|
||||||
|
|
||||||
for i in 1 2 3 4 5 6; do
|
for i in $(seq 1 12); do
|
||||||
RESPONSE=$(curl -sf https://staging.motovaultpro.com/api/health 2>/dev/null) || {
|
RESPONSE=$(curl -sf https://staging.motovaultpro.com/api/health 2>/dev/null) || {
|
||||||
echo "Attempt $i/6: Connection failed, waiting 10s..."
|
echo "Attempt $i/12: Connection failed, waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check status is "healthy"
|
# Check status is "healthy"
|
||||||
STATUS=$(echo "$RESPONSE" | jq -r '.status')
|
STATUS=$(echo "$RESPONSE" | jq -r '.status')
|
||||||
if [ "$STATUS" != "healthy" ]; then
|
if [ "$STATUS" != "healthy" ]; then
|
||||||
echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
|
echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -270,8 +270,8 @@ jobs:
|
|||||||
')
|
')
|
||||||
|
|
||||||
if [ -n "$MISSING" ]; then
|
if [ -n "$MISSING" ]; then
|
||||||
echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
|
echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
|
||||||
sleep 10
|
sleep 5
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -280,7 +280,7 @@ jobs:
|
|||||||
exit 0
|
exit 0
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "ERROR: Staging health check failed after 6 attempts"
|
echo "ERROR: Staging health check failed after 12 attempts"
|
||||||
echo "Last response: $RESPONSE"
|
echo "Last response: $RESPONSE"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ services:
|
|||||||
- mvp-backend-blue
|
- mvp-backend-blue
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 10s
|
start_period: 10s
|
||||||
@@ -89,10 +89,10 @@ services:
|
|||||||
test:
|
test:
|
||||||
- CMD-SHELL
|
- CMD-SHELL
|
||||||
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 5
|
||||||
start_period: 30s
|
start_period: 60s
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
@@ -125,7 +125,7 @@ services:
|
|||||||
- mvp-backend-green
|
- mvp-backend-green
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 10s
|
start_period: 10s
|
||||||
@@ -180,10 +180,10 @@ services:
|
|||||||
test:
|
test:
|
||||||
- CMD-SHELL
|
- CMD-SHELL
|
||||||
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 5
|
||||||
start_period: 30s
|
start_period: 60s
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
|
|||||||
@@ -29,10 +29,10 @@ services:
|
|||||||
backend:
|
backend:
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "traefik", "healthcheck"]
|
test: ["CMD", "traefik", "healthcheck"]
|
||||||
interval: 30s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 20s
|
start_period: 10s
|
||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)"
|
- "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.motovaultpro.local`)"
|
||||||
@@ -76,10 +76,10 @@ services:
|
|||||||
- mvp-backend
|
- mvp-backend
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
test: ["CMD-SHELL", "curl -sf http://localhost:3000 || exit 1"]
|
||||||
interval: 30s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 20s
|
start_period: 10s
|
||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- traefik.docker.network=motovaultpro_frontend
|
- traefik.docker.network=motovaultpro_frontend
|
||||||
@@ -148,10 +148,10 @@ services:
|
|||||||
test:
|
test:
|
||||||
- CMD-SHELL
|
- CMD-SHELL
|
||||||
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
- node -e "require('http').get('http://localhost:3001/health', r => process.exit(r.statusCode===200?0:1)).on('error', () => process.exit(1))"
|
||||||
interval: 30s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
start_period: 180s
|
start_period: 60s
|
||||||
labels:
|
labels:
|
||||||
- "traefik.enable=true"
|
- "traefik.enable=true"
|
||||||
- "traefik.docker.network=motovaultpro_backend"
|
- "traefik.docker.network=motovaultpro_backend"
|
||||||
@@ -198,10 +198,10 @@ services:
|
|||||||
- mvp-redis
|
- mvp-redis
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||||
interval: 30s
|
interval: 5s
|
||||||
timeout: 10s
|
timeout: 5s
|
||||||
retries: 3
|
retries: 3
|
||||||
start_period: 30s
|
start_period: 15s
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -230,10 +230,10 @@ services:
|
|||||||
- "5432:5432" # Development access only
|
- "5432:5432" # Development access only
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
start_period: 30s
|
start_period: 15s
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
@@ -254,9 +254,10 @@ services:
|
|||||||
- "6379:6379" # Development access only
|
- "6379:6379" # Development access only
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "redis-cli", "ping"]
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
interval: 10s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 5
|
retries: 5
|
||||||
|
start_period: 5s
|
||||||
logging:
|
logging:
|
||||||
driver: json-file
|
driver: json-file
|
||||||
options:
|
options:
|
||||||
|
|||||||
Reference in New Issue
Block a user