Files
motovaultpro/.gitea/workflows/production.yaml
Eric Gullickson c6b99ab29a
All checks were successful
Deploy to Staging / Build Images (push) Successful in 1m34s
Deploy to Staging / Deploy to Staging (push) Successful in 23s
Deploy to Staging / Verify Staging (push) Successful in 2m36s
Deploy to Staging / Notify Staging Ready (push) Successful in 8s
Deploy to Staging / Notify Staging Failure (push) Has been skipped
fix: Postgres Fixes for Prod
2026-02-08 20:57:49 -06:00

500 lines
20 KiB
YAML

# MotoVaultPro Production Deployment Workflow
# Manual trigger only - run after verifying staging
# Blue-green deployment with auto-rollback
#
# Optimization: Uses sparse checkout (scripts/ only) + shallow clone
# since all scripts run from $DEPLOY_PATH on the production server
name: Deploy to Production
run-name: Production Deploy - ${{ inputs.image_tag || 'latest' }}
on:
workflow_dispatch:
inputs:
image_tag:
description: 'Image tag to deploy (defaults to latest)'
required: false
default: 'latest'
env:
REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro
BASE_COMPOSE_FILE: docker-compose.yml
COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml
COMPOSE_PROD: docker-compose.prod.yml
HEALTH_CHECK_TIMEOUT: "240"
LOG_LEVEL: INFO
jobs:
# ============================================
# VALIDATE - Determine target stack
# ============================================
validate:
name: Validate Prerequisites
runs-on: prod
outputs:
target_stack: ${{ steps.determine-stack.outputs.target_stack }}
backend_image: ${{ steps.set-images.outputs.backend_image }}
frontend_image: ${{ steps.set-images.outputs.frontend_image }}
ocr_image: ${{ steps.set-images.outputs.ocr_image }}
steps:
- name: Check Docker availability
run: |
docker info > /dev/null 2>&1 || (echo "ERROR - Docker not accessible" && exit 1)
docker compose version > /dev/null 2>&1 || (echo "ERROR - Docker Compose not available" && exit 1)
- name: Check deployment path
run: test -d "$DEPLOY_PATH" || (echo "ERROR - DEPLOY_PATH not found" && exit 1)
- name: Login to Gitea Container Registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
- name: Set image tags
id: set-images
run: |
TAG="${{ inputs.image_tag }}"
echo "backend_image=$REGISTRY/egullickson/backend:$TAG" >> $GITHUB_OUTPUT
echo "frontend_image=$REGISTRY/egullickson/frontend:$TAG" >> $GITHUB_OUTPUT
echo "ocr_image=$REGISTRY/egullickson/ocr:$TAG" >> $GITHUB_OUTPUT
- name: Determine target stack
id: determine-stack
run: |
STATE_FILE="$DEPLOY_PATH/config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
ACTIVE_STACK=$(jq -r '.active_stack // "blue"' "$STATE_FILE")
if [ "$ACTIVE_STACK" = "blue" ]; then
echo "target_stack=green" >> $GITHUB_OUTPUT
echo "Deploying to GREEN stack (BLUE is currently active)"
else
echo "target_stack=blue" >> $GITHUB_OUTPUT
echo "Deploying to BLUE stack (GREEN is currently active)"
fi
else
echo "target_stack=green" >> $GITHUB_OUTPUT
echo "No state file found, defaulting to GREEN stack"
fi
# ============================================
# DEPLOY PROD - Blue-green deployment
# ============================================
deploy-prod:
name: Deploy to Production
runs-on: prod
needs: validate
env:
TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
BACKEND_IMAGE: ${{ needs.validate.outputs.backend_image }}
FRONTEND_IMAGE: ${{ needs.validate.outputs.frontend_image }}
OCR_IMAGE: ${{ needs.validate.outputs.ocr_image }}
steps:
- name: Checkout scripts, config, and compose files
uses: actions/checkout@v4
with:
sparse-checkout: |
scripts/
config/
docker-compose.yml
docker-compose.blue-green.yml
docker-compose.prod.yml
sparse-checkout-cone-mode: false
fetch-depth: 1
- name: Sync config, scripts, and compose files to deploy path
run: |
rsync -av --delete "$GITHUB_WORKSPACE/config/" "$DEPLOY_PATH/config/"
rsync -av --delete "$GITHUB_WORKSPACE/scripts/" "$DEPLOY_PATH/scripts/"
cp "$GITHUB_WORKSPACE/docker-compose.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.blue-green.yml" "$DEPLOY_PATH/"
cp "$GITHUB_WORKSPACE/docker-compose.prod.yml" "$DEPLOY_PATH/"
- name: Generate logging configuration
run: |
cd "$DEPLOY_PATH"
chmod +x scripts/ci/generate-log-config.sh
./scripts/ci/generate-log-config.sh "$LOG_LEVEL"
- name: Login to registry
run: |
echo "${{ secrets.REGISTRY_PASSWORD }}" | docker login -u "${{ secrets.REGISTRY_USER }}" --password-stdin "$REGISTRY"
- name: Inject secrets
run: |
cd "$DEPLOY_PATH"
chmod +x scripts/inject-secrets.sh
SECRETS_DIR="$DEPLOY_PATH/secrets/app" ./scripts/inject-secrets.sh
env:
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
AUTH0_CLIENT_SECRET: ${{ secrets.AUTH0_CLIENT_SECRET }}
AUTH0_MANAGEMENT_CLIENT_ID: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_ID }}
AUTH0_MANAGEMENT_CLIENT_SECRET: ${{ secrets.AUTH0_MANAGEMENT_CLIENT_SECRET }}
GOOGLE_MAPS_API_KEY: ${{ secrets.GOOGLE_MAPS_API_KEY }}
GOOGLE_MAPS_MAP_ID: ${{ secrets.GOOGLE_MAPS_MAP_ID }}
CF_DNS_API_TOKEN: ${{ secrets.CF_DNS_API_TOKEN }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
STRIPE_SECRET_KEY: ${{ secrets.STRIPE_SECRET_KEY }}
STRIPE_WEBHOOK_SECRET: ${{ secrets.STRIPE_WEBHOOK_SECRET }}
- name: Initialize data directories
run: |
cd "$DEPLOY_PATH"
sudo mkdir -p data/backups data/documents data/traefik
sudo chown -R 1001:1001 data/backups data/documents
sudo chmod 755 data/backups data/documents
# Traefik acme.json requires 600 permissions
if [ ! -f data/traefik/acme.json ]; then
sudo touch data/traefik/acme.json
fi
sudo chmod 600 data/traefik/acme.json
- name: Pull new images
run: |
docker pull $BACKEND_IMAGE
docker pull $FRONTEND_IMAGE
docker pull $OCR_IMAGE
- name: Record expected image IDs
id: expected-images
run: |
# Get the image IDs we just pulled - these are what containers should use
FRONTEND_ID=$(docker images --format '{{.ID}}' $FRONTEND_IMAGE | head -1)
BACKEND_ID=$(docker images --format '{{.ID}}' $BACKEND_IMAGE | head -1)
echo "Expected frontend image ID: $FRONTEND_ID"
echo "Expected backend image ID: $BACKEND_ID"
echo "frontend_id=$FRONTEND_ID" >> $GITHUB_OUTPUT
echo "backend_id=$BACKEND_ID" >> $GITHUB_OUTPUT
- name: Start shared services
run: |
cd "$DEPLOY_PATH"
# Start shared infrastructure services (database, cache, logging)
# --no-recreate prevents restarting postgres/redis when config files change
# These must persist across blue-green deployments to avoid data service disruption
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --no-recreate \
mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana
- name: Wait for shared services health
run: |
echo "Waiting for PostgreSQL and Redis to be healthy..."
for service in mvp-postgres mvp-redis; do
for i in $(seq 1 24); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy"
break
fi
if [ $i -eq 24 ]; then
echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "Waiting for $service... (attempt $i/24, status: $health)"
sleep 5
done
done
echo "All shared services healthy"
- name: Start target stack
run: |
cd "$DEPLOY_PATH"
export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE
# --force-recreate ensures containers are recreated even if image tag is same
# This prevents stale container content when image digest changes
# Start shared OCR service and target stack
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization
run: sleep 5
- name: Verify container images
run: |
# Verify containers are running the expected images
EXPECTED_FRONTEND="${{ steps.expected-images.outputs.frontend_id }}"
EXPECTED_BACKEND="${{ steps.expected-images.outputs.backend_id }}"
RUNNING_FRONTEND=$(docker inspect --format='{{.Image}}' mvp-frontend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
RUNNING_BACKEND=$(docker inspect --format='{{.Image}}' mvp-backend-$TARGET_STACK | sed 's/sha256://' | cut -c1-12)
echo "Frontend - Expected: $EXPECTED_FRONTEND, Running: $RUNNING_FRONTEND"
echo "Backend - Expected: $EXPECTED_BACKEND, Running: $RUNNING_BACKEND"
if [[ "$RUNNING_FRONTEND" != "$EXPECTED_FRONTEND" ]]; then
echo "ERROR: Frontend container not using expected image!"
echo "Container may be stale. Force recreate should have prevented this."
exit 1
fi
if [[ "$RUNNING_BACKEND" != "$EXPECTED_BACKEND" ]]; then
echo "ERROR: Backend container not using expected image!"
exit 1
fi
echo "OK: All containers using correct images"
- name: Run health check
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/health-check.sh"
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/health-check.sh" $TARGET_STACK $HEALTH_CHECK_TIMEOUT
- name: Start Traefik
run: |
cd "$DEPLOY_PATH"
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik
- name: Wait for Traefik
run: |
echo "Waiting for Traefik to be healthy..."
timeout 30 bash -c "until docker inspect --format='{{.State.Health.Status}}' mvp-traefik 2>/dev/null | grep -q healthy; do sleep 2; done" || {
echo "Traefik health check timed out, checking status..."
docker inspect --format='{{.State.Status}}' mvp-traefik
docker logs mvp-traefik --tail 20
exit 1
}
echo "Traefik is healthy"
- name: Switch traffic
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh"
# DEPLOY_PATH ensures script modifies config at /opt/motovaultpro, not checkout dir
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/switch-traffic.sh" $TARGET_STACK instant
- name: Update deployment state
run: |
cd "$DEPLOY_PATH"
STATE_FILE="config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
jq --arg commit "${{ inputs.image_tag }}" \
--arg ts "$TIMESTAMP" \
'.last_deployment = $ts | .last_deployment_commit = $commit | .last_deployment_status = "success" | .rollback_available = true' \
"$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
fi
# ============================================
# VERIFY PROD - External health check
# ============================================
verify-prod:
name: Verify Production
runs-on: prod
needs: [validate, deploy-prod]
env:
TARGET_STACK: ${{ needs.validate.outputs.target_stack }}
steps:
- name: Wait for routing propagation
run: sleep 5
- name: Check container status and health
run: |
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
if [ "$status" != "running" ]; then
echo "ERROR: $service is not running (status: $status)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "OK: $service is running"
done
# Wait for Docker healthchecks to complete (services with healthcheck defined)
echo ""
echo "Waiting for Docker healthchecks..."
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr; do
# Check if service has a healthcheck defined
has_healthcheck=$(docker inspect --format='{{if .Config.Healthcheck}}true{{else}}false{{end}}' $service 2>/dev/null || echo "false")
if [ "$has_healthcheck" = "true" ]; then
# 48 attempts x 5 seconds = 4 minutes max wait (backend with fresh migrations can take ~3 min)
for i in $(seq 1 48); do
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$health" = "healthy" ]; then
echo "OK: $service is healthy"
break
fi
# Don't fail immediately on unhealthy - container may still be starting up
# and can recover. Let the timeout handle truly broken containers.
if [ $i -eq 48 ]; then
echo "ERROR: $service health check timed out (status: $health)"
docker logs $service --tail 100 2>/dev/null || true
exit 1
fi
echo "Waiting for $service healthcheck... (attempt $i/48, status: $health)"
sleep 5
done
else
echo "SKIP: $service has no healthcheck defined"
fi
done
- name: Wait for backend health
run: |
for i in $(seq 1 12); do
if docker exec mvp-backend-$TARGET_STACK curl -sf http://localhost:3001/health > /dev/null 2>&1; then
echo "OK: Backend health check passed"
exit 0
fi
if [ $i -eq 12 ]; then
echo "ERROR: Backend health check failed after 12 attempts"
docker logs mvp-backend-$TARGET_STACK --tail 100
exit 1
fi
echo "Attempt $i/12: Backend not ready, waiting 5s..."
sleep 5
done
- name: External health check
run: |
REQUIRED_FEATURES='["admin","auth","onboarding","vehicles","documents","fuel-logs","stations","maintenance","platform","notifications","user-profile","user-preferences","user-export"]'
for i in $(seq 1 12); do
RESPONSE=$(curl -sf https://motovaultpro.com/api/health 2>/dev/null) || {
echo "Attempt $i/12: Connection failed, waiting 5s..."
sleep 5
continue
}
# Check status is "healthy"
STATUS=$(echo "$RESPONSE" | jq -r '.status')
if [ "$STATUS" != "healthy" ]; then
echo "Attempt $i/12: Status is '$STATUS', not 'healthy'. Waiting 5s..."
sleep 5
continue
fi
# Check all required features are present
MISSING=$(echo "$RESPONSE" | jq -r --argjson required "$REQUIRED_FEATURES" '
$required - .features | if length > 0 then . else empty end | @json
')
if [ -n "$MISSING" ]; then
echo "Attempt $i/12: Missing features: $MISSING. Waiting 5s..."
sleep 5
continue
fi
FEATURE_COUNT=$(echo "$RESPONSE" | jq '.features | length')
echo "OK: Production health check passed - status: healthy, features: $FEATURE_COUNT"
exit 0
done
echo "ERROR: Production health check failed after 12 attempts"
echo "Last response: $RESPONSE"
exit 1
- name: Verify container status
run: |
for service in mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK; do
status=$(docker inspect --format='{{.State.Status}}' $service 2>/dev/null || echo "not found")
health=$(docker inspect --format='{{.State.Health.Status}}' $service 2>/dev/null || echo "unknown")
if [ "$status" != "running" ] || [ "$health" != "healthy" ]; then
echo "ERROR: $service is not healthy (status: $status, health: $health)"
docker logs $service --tail 50 2>/dev/null || true
exit 1
fi
echo "OK: $service is running and healthy"
done
- name: Validate Traefik routing weights
run: |
# Verify traffic has actually switched to the new stack
BLUE_GREEN_CONFIG="$DEPLOY_PATH/config/traefik/dynamic/blue-green.yml"
if [[ "$TARGET_STACK" == "green" ]]; then
EXPECTED_TARGET_WEIGHT=100
EXPECTED_OTHER_WEIGHT=0
TARGET_SVC="mvp-frontend-green-svc"
else
EXPECTED_TARGET_WEIGHT=100
EXPECTED_OTHER_WEIGHT=0
TARGET_SVC="mvp-frontend-blue-svc"
fi
ACTUAL_WEIGHT=$(grep -A1 "$TARGET_SVC" "$BLUE_GREEN_CONFIG" | grep weight | grep -oE '[0-9]+' | head -1)
if [[ "$ACTUAL_WEIGHT" != "$EXPECTED_TARGET_WEIGHT" ]]; then
echo "ERROR: Traffic not routed to $TARGET_STACK stack!"
echo "Expected weight for $TARGET_SVC: $EXPECTED_TARGET_WEIGHT, Actual: $ACTUAL_WEIGHT"
cat "$BLUE_GREEN_CONFIG" | grep -A2 weight
exit 1
fi
echo "OK: Traffic correctly routed to $TARGET_STACK (weight: $ACTUAL_WEIGHT)"
# ============================================
# ROLLBACK - Auto-rollback on failure
# ============================================
rollback:
name: Auto Rollback
runs-on: prod
needs: [validate, deploy-prod, verify-prod]
if: failure()
steps:
- name: Checkout scripts
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Execute rollback
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh"
DEPLOY_PATH="$DEPLOY_PATH" "$GITHUB_WORKSPACE/scripts/ci/auto-rollback.sh" "Production verification failed - automatic rollback"
- name: Update state
run: |
cd "$DEPLOY_PATH"
STATE_FILE="config/deployment/state.json"
if [ -f "$STATE_FILE" ] && command -v jq &> /dev/null; then
jq '.last_deployment_status = "rolled_back"' "$STATE_FILE" > "${STATE_FILE}.tmp" && mv "${STATE_FILE}.tmp" "$STATE_FILE"
fi
# ============================================
# NOTIFY SUCCESS
# ============================================
notify-success:
name: Notify Success
runs-on: prod
needs: [validate, verify-prod]
if: success()
steps:
- name: Checkout scripts only
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Send success notification
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
"$GITHUB_WORKSPACE/scripts/ci/notify.sh" success "Production deployment successful - ${{ inputs.image_tag }} is now live" ${{ inputs.image_tag }}
env:
DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}
# ============================================
# NOTIFY FAILURE
# ============================================
notify-failure:
name: Notify Failure
runs-on: prod
needs: [validate, deploy-prod, verify-prod, rollback]
if: failure()
steps:
- name: Checkout scripts only
uses: actions/checkout@v4
with:
sparse-checkout: scripts/
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Send failure notification
run: |
chmod +x "$GITHUB_WORKSPACE/scripts/ci/notify.sh"
"$GITHUB_WORKSPACE/scripts/ci/notify.sh" failure "Production deployment failed for ${{ inputs.image_tag }}" ${{ inputs.image_tag }}
env:
DEPLOY_NOTIFY_EMAIL: ${{ vars.DEPLOY_NOTIFY_EMAIL }}
RESEND_API_KEY: ${{ secrets.RESEND_API_KEY }}