From c6b99ab29a80e31c96cfd23a2922cd42d9ff7e02 Mon Sep 17 00:00:00 2001 From: Eric Gullickson <16152721+ericgullickson@users.noreply.github.com> Date: Sun, 8 Feb 2026 20:57:49 -0600 Subject: [PATCH] fix: Postgres Fixes for Prod --- .gitea/workflows/production.yaml | 28 +++++++++++++++++++++++++--- docker-compose.prod.yml | 2 +- docker-compose.yml | 2 +- scripts/ci/health-check.sh | 11 +++++------ 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/.gitea/workflows/production.yaml b/.gitea/workflows/production.yaml index f8ad6b4..4686fb4 100644 --- a/.gitea/workflows/production.yaml +++ b/.gitea/workflows/production.yaml @@ -22,7 +22,7 @@ env: BASE_COMPOSE_FILE: docker-compose.yml COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml COMPOSE_PROD: docker-compose.prod.yml - HEALTH_CHECK_TIMEOUT: "60" + HEALTH_CHECK_TIMEOUT: "240" LOG_LEVEL: INFO jobs: @@ -169,10 +169,32 @@ jobs: run: | cd "$DEPLOY_PATH" # Start shared infrastructure services (database, cache, logging) - # These persist across blue-green deployments - docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d \ + # --no-recreate prevents restarting postgres/redis when config files change + # These must persist across blue-green deployments to avoid data service disruption + docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --no-recreate \ mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana + - name: Wait for shared services health + run: | + echo "Waiting for PostgreSQL and Redis to be healthy..." + for service in mvp-postgres mvp-redis; do + for i in $(seq 1 24); do + health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown") + if [ "$health" = "healthy" ]; then + echo "OK: $service is healthy" + break + fi + if [ $i -eq 24 ]; then + echo "ERROR: $service health check timed out (status: $health)" + docker logs $service --tail 50 2>/dev/null || true + exit 1 + fi + echo "Waiting for $service... (attempt $i/24, status: $health)" + sleep 5 + done + done + echo "All shared services healthy" + - name: Start target stack run: | cd "$DEPLOY_PATH" diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index fb64c09..40c6ca5 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -61,7 +61,7 @@ services: LOG_LEVEL: error POSTGRES_LOG_STATEMENT: none POSTGRES_LOG_MIN_DURATION_STATEMENT: -1 - PGDATA: /var/lib/postgresql/data + PGDATA: /var/lib/postgresql/data/pgdata # Redis - Remove dev ports, production log level mvp-redis: diff --git a/docker-compose.yml b/docker-compose.yml index ae4b8c6..5e9a247 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -233,7 +233,7 @@ services: LOG_LEVEL: debug POSTGRES_LOG_STATEMENT: all POSTGRES_LOG_MIN_DURATION_STATEMENT: 0 - PGDATA: /var/lib/postgresql/data + PGDATA: /var/lib/postgresql/data/pgdata volumes: - mvp_postgres_data:/var/lib/postgresql/data # Secrets (K8s Secrets equivalent) diff --git a/scripts/ci/health-check.sh b/scripts/ci/health-check.sh index fa7b388..d3243fb 100755 --- a/scripts/ci/health-check.sh +++ b/scripts/ci/health-check.sh @@ -92,19 +92,18 @@ wait_for_health() { if [[ $status -eq 0 ]]; then return 0 - elif [[ $status -eq 1 ]]; then - echo " ERROR: Container $container is unhealthy" - docker logs "$container" --tail 20 2>/dev/null || true - return 1 fi - # Still starting, wait + # Both "starting" and "unhealthy" are treated as transient during the wait period. + # Docker can report "unhealthy" briefly during start_period before the next check + # cycle transitions it back. Only the overall timeout should cause failure. sleep 2 elapsed=$((elapsed + 2)) echo " Waiting for $container... (${elapsed}s/${TIMEOUT}s)" done - echo " ERROR: Timeout waiting for $container" + echo " ERROR: Container $container did not become healthy within ${TIMEOUT}s" + docker logs "$container" --tail 20 2>/dev/null || true return 1 }