fix: resolve staging deployment issues with Traefik, Loki, and Alloy (refs #105)
All checks were successful
Deploy to Staging / Build Images (pull_request) Successful in 1m21s
Deploy to Staging / Deploy to Staging (pull_request) Successful in 48s
Deploy to Staging / Verify Staging (pull_request) Successful in 2m37s
Deploy to Staging / Notify Staging Ready (pull_request) Successful in 8s
Deploy to Staging / Notify Staging Failure (pull_request) Has been skipped

- Exclude blue-green.yml from staging Traefik by mounting dynamic-staging/
  directory (only grafana.yml + middleware.yml) instead of dynamic/ which
  contains production-only blue-green routing config
- Disable Loki healthcheck: distroless image has no /bin/sh so CMD-SHELL
  healthchecks cannot execute; Alloy and Grafana verify Loki connectivity
- Fix Alloy healthcheck: replace wget (not in image) with bash /dev/tcp
- Add Grafana staging domain override (logs.staging.motovaultpro.com)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Eric Gullickson
2026-02-06 10:51:00 -06:00
parent 842b0eb945
commit 462d306783
4 changed files with 201 additions and 5 deletions

View File

@@ -0,0 +1,8 @@
http:
middlewares:
grafana-ipwhitelist:
ipAllowList:
sourceRange:
- "10.0.0.0/8"
- "172.16.0.0/12"
- "192.168.0.0/16"

View File

@@ -0,0 +1,173 @@
http:
middlewares:
# Security headers middleware
secure-headers:
headers:
accessControlAllowMethods:
- GET
- OPTIONS
- PUT
- POST
- DELETE
accessControlAllowOriginList:
- "https://admin.motovaultpro.com"
- "https://motovaultpro.com"
accessControlMaxAge: 100
addVaryHeader: true
browserXssFilter: true
contentTypeNosniff: true
forceSTSHeader: true
frameDeny: true
stsIncludeSubdomains: true
stsPreload: true
stsSeconds: 31536000
customRequestHeaders:
X-Forwarded-Proto: https
# CORS middleware for API endpoints
cors:
headers:
accessControlAllowCredentials: true
accessControlAllowHeaders:
- "Authorization"
- "Content-Type"
- "X-Requested-With"
- "X-Tenant-ID"
- "X-Request-Id"
accessControlAllowMethods:
- "GET"
- "POST"
- "PUT"
- "DELETE"
- "OPTIONS"
accessControlAllowOriginList:
- "https://admin.motovaultpro.com"
- "https://motovaultpro.com"
accessControlMaxAge: 100
# API authentication middleware
api-auth:
forwardAuth:
address: "http://admin-backend:3001/auth/verify"
authResponseHeaders:
- "X-Auth-User"
- "X-Auth-Roles"
- "X-Tenant-ID"
authRequestHeaders:
- "Authorization"
- "X-Tenant-ID"
trustForwardHeader: true
# Platform API authentication middleware
platform-auth:
forwardAuth:
address: "http://admin-backend:3001/auth/verify-platform"
authResponseHeaders:
- "X-Service-Name"
- "X-Auth-Scope"
authRequestHeaders:
- "X-API-Key"
- "Authorization"
trustForwardHeader: true
# Rate limiting middleware
rate-limit:
rateLimit:
burst: 100
average: 50
period: 1m
# Request/response size limits
size-limit:
buffering:
maxRequestBodyBytes: 26214400 # 25MB
maxResponseBodyBytes: 26214400 # 25MB
# IP whitelist for development (optional)
local-ips:
ipAllowList:
sourceRange:
- "127.0.0.1/32"
- "10.0.0.0/8"
- "172.16.0.0/12"
- "192.168.0.0/16"
# Advanced security headers for production
security-headers-strict:
headers:
accessControlAllowCredentials: false
accessControlAllowMethods:
- GET
- POST
- OPTIONS
accessControlAllowOriginList:
- "https://admin.motovaultpro.com"
- "https://motovaultpro.com"
browserXssFilter: true
contentTypeNosniff: true
customRequestHeaders:
X-Forwarded-Proto: https
customResponseHeaders:
X-Frame-Options: DENY
X-Content-Type-Options: nosniff
Referrer-Policy: strict-origin-when-cross-origin
Permissions-Policy: "geolocation=(), microphone=(), camera=()"
forceSTSHeader: true
frameDeny: true
stsIncludeSubdomains: true
stsPreload: true
stsSeconds: 31536000
# Circuit breaker for reliability
circuit-breaker:
circuitBreaker:
expression: "NetworkErrorRatio() > 0.3 || ResponseCodeRatio(500, 600, 0, 600) > 0.3"
checkPeriod: 30s
fallbackDuration: 10s
recoveryDuration: 30s
# Request retry for resilience
retry-policy:
retry:
attempts: 3
initialInterval: 100ms
# Compress responses for performance
compression:
compress: {}
# Health check middleware chain
health-check-chain:
chain:
middlewares:
- compression
- secure-headers
# API middleware chain
api-chain:
chain:
middlewares:
- compression
- security-headers-strict
- cors
- rate-limit
- api-auth
- retry-policy
# Platform API middleware chain
platform-chain:
chain:
middlewares:
- compression
- security-headers-strict
- rate-limit
- platform-auth
- circuit-breaker
- retry-policy
# Public frontend middleware chain
frontend-chain:
chain:
middlewares:
- compression
- secure-headers

View File

@@ -15,6 +15,8 @@ services:
mvp-traefik:
image: ${REGISTRY_MIRRORS:-git.motovaultpro.com/egullickson/mirrors}/traefik:v3.6
container_name: mvp-traefik-staging
volumes:
- ./config/traefik/dynamic-staging:/etc/traefik/dynamic:ro
labels:
- "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.staging.motovaultpro.com`)"
@@ -79,6 +81,20 @@ services:
volumes:
- mvp_redis_staging_data:/data
# ========================================
# Grafana (Staging domain override)
# ========================================
mvp-grafana:
labels:
- "traefik.enable=true"
- "traefik.docker.network=motovaultpro_frontend"
- "traefik.http.routers.grafana.rule=Host(`logs.staging.motovaultpro.com`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.routers.grafana.middlewares=grafana-ipwhitelist@file"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
# Staging-specific volumes (separate from production)
volumes:
mvp_postgres_staging_data:

View File

@@ -276,10 +276,9 @@ services:
networks:
- backend
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:3100/ready || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# Loki 3.x uses a distroless image with no shell or HTTP client.
# Disable in-container healthcheck; Alloy and Grafana verify connectivity.
disable: true
logging:
driver: json-file
options:
@@ -305,7 +304,7 @@ services:
depends_on:
- mvp-loki
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:12345/ready || exit 1"]
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/12345'"]
interval: 30s
timeout: 10s
retries: 3