chore: unify health check timers across compose and workflows
Some checks failed
Deploy to Staging / Build Images (push) Successful in 32s
Deploy to Staging / Deploy to Staging (push) Successful in 21s
Deploy to Staging / Verify Staging (push) Failing after 1m18s
Deploy to Staging / Notify Staging Ready (push) Has been skipped
Deploy to Staging / Notify Staging Failure (push) Successful in 7s

Docker Compose health checks (all services):
- interval: 5s (was 10-30s)
- timeout: 5s (unified)
- backend start_period: 60s (was 30-180s)

Gitea workflow health check loops:
- Docker healthcheck: 48 attempts x 5s = 4 min (was 24 x 10s)
- Backend health: 12 attempts x 5s = 60s (was 6 x 10s)
- External health: 12 attempts x 5s = 60s (was 6 x 10s)
- Initial waits: 5s (was 10-15s)

Same total wait times, faster detection of success/failure.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-03 21:10:47 -06:00
parent 88db25019f
commit 26196d34ea
4 changed files with 60 additions and 59 deletions

View File

@@ -174,7 +174,7 @@ jobs:
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization
run: sleep 10
run: sleep 5
- name: Verify container images
run: |
@@ -272,8 +272,8 @@ jobs:
# Check if service has a healthcheck defined
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
if [ "$has_healthcheck" = "true" ]; then
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 24); do
# 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 48); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy"
@@ -283,13 +283,13 @@ jobs:
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
if [ $i -eq 24 ]; then
if [ $i -eq 48 ]; then
echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
sleep 10
echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
sleep 5
done
else
echo "SKIP: $service has no healthcheck defined"
@@ -298,17 +298,17 @@ jobs:
- name: Wait for backend health
run: |
for i in $(seq 1 24); do
for i in $(seq 1 12); do
if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
echo "OK: Backend health check passed"
exit 0
fi
if [ $i -eq 24 ]; then
echo "ERROR: Backend health check failed after 6 attempts"
if [ $i -eq 12 ]; then
echo "ERROR: Backend health check failed after 12 attempts"
docker logs mvp-backend-$TARGET_STACK --tail 100
exit 1
fi
echo "Attempt $i/24: Backend not ready, waiting 5s..."
echo "Attempt $i/12: Backend not ready, waiting 5s..."
sleep 5
done
@@ -316,9 +316,9 @@ jobs:
run: |
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
for i in $(seq 1 24); do
for i in $(seq 1 12); do
RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
echo "Attempt $i/6: Connection failed, waiting 5s..."
echo "Attempt $i/12: Connection failed, waiting 5s..."
sleep 5
continue
}
@@ -326,8 +326,8 @@ jobs:
# Check status is "healthy"
STATUS=$(echo "$RESPONSE" | jq -r '.status')
if [ "$STATUS" != "healthy" ]; then
echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
sleep 10
echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
sleep 5
continue
fi
@@ -337,8 +337,8 @@ jobs:
')
if [ -n "$MISSING" ]; then
echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
sleep 10
echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
sleep 5
continue
fi
@@ -347,7 +347,7 @@ jobs:
exit 0
done
echo "ERROR: Production health check failed after 6 attempts"
echo "ERROR: Production health check failed after 12 attempts"
echo "Last response: $RESPONSE"
exit 1

View File

@@ -173,7 +173,7 @@ jobs:
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
- name: Wait for services
run: sleep 15
run: sleep 5
# ============================================
# VERIFY STAGING - Health checks
@@ -205,8 +205,8 @@ jobs:
# Check if service has a healthcheck defined
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
if [ "$has_healthcheck" = "true" ]; then
# 24 attempts x 10 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 24); do
# 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 48); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy"
@@ -216,13 +216,13 @@ jobs:
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
if [ $i -eq 24 ]; then
if [ $i -eq 48 ]; then
echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "Waiting for $service healthcheck... (attempt $i/24, status: $health)"
sleep 10
echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
sleep 5
done
else
echo "SKIP: $service has no healthcheck defined"
@@ -231,36 +231,36 @@ jobs:
- name: Wait for backend health
run: |
for i in 1 2 3 4 5 6; do
for i in $(seq 1 12); do
if docker exec mvp-backend-staging curl -sf http://localhost:3001/health > /dev/null 2>&1; then
echo "OK: Backend health check passed"
exit 0
fi
if [ $i -eq 6 ]; then
echo "ERROR: Backend health check failed after 6 attempts"
if [ $i -eq 12 ]; then
echo "ERROR: Backend health check failed after 12 attempts"
docker logs mvp-backend-staging --tail 100
exit 1
fi
echo "Attempt $i/6: Backend not ready, waiting 10s..."
sleep 10
echo "Attempt $i/12: Backend not ready, waiting 5s..."
sleep 5
done
- name: Check external endpoint
run: |
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
for i in 1 2 3 4 5 6; do
for i in $(seq 1 12); do
RESPONSE=$(curl -sf https://staging.motovaultpro.com/api/health 2>/dev/null) || {
echo "Attempt $i/6: Connection failed, waiting 10s..."
sleep 10
echo "Attempt $i/12: Connection failed, waiting 5s..."
sleep 5
continue
}
# Check status is "healthy"
STATUS=$(echo "$RESPONSE" | jq -r '.status')
if [ "$STATUS" != "healthy" ]; then
echo "Attempt $i/6: Status is '$STATUS', not 'healthy'. Waiting 10s..."
sleep 10
echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
sleep 5
continue
fi
@@ -270,8 +270,8 @@ jobs:
')
if [ -n "$MISSING" ]; then
echo "Attempt $i/6: Missing features: $MISSING. Waiting 10s..."
sleep 10
echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
sleep 5
continue
fi
@@ -280,7 +280,7 @@ jobs:
exit 0
done
echo "ERROR: Staging health check failed after 6 attempts"
echo "ERROR: Staging health check failed after 12 attempts"
echo "Last response: $RESPONSE"
exit 1