diff --git a/.gitignore b/.gitignore index 90a3d93..aa8209f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,11 +19,7 @@ data/vehicle-etl/vehapi.key data/vehicle-etl/snapshots/* !data/vehicle-etl/snapshots/.gitkeep -# K8s-aligned configuration and secret mounts (real files ignored; examples committed) -config/** -!config/ -!config/**/ -!config/**/*.example +# K8s-aligned secret mounts (real files ignored; examples committed) secrets/** !secrets/ !secrets/**/ diff --git a/config/app/ci.yml b/config/app/ci.yml new file mode 100755 index 0000000..81dc436 --- /dev/null +++ b/config/app/ci.yml @@ -0,0 +1,79 @@ +# CI-specific configuration for backend tests. Mirrors production schema with safe defaults. +server: + name: mvp-backend-ci + port: 3001 + environment: test + node_env: test + +database: + host: mvp-postgres + port: 5432 + name: motovaultpro + user: postgres + +redis: + host: mvp-redis + port: 6379 + db: 0 + +platform: + services: + vehicles: + url: http://mvp-platform-vehicles-api:8000 + timeout: 5s + +external: + vpic: + url: https://vpic.nhtsa.dot.gov/api/vehicles + timeout: 10s + +service: + name: mvp-backend + +cors: + origins: + - http://localhost + allow_credentials: false + max_age: 0 + +frontend: + api_base_url: /api + auth0: + domain: motovaultpro.us.auth0.com + audience: https://api.motovaultpro.com + +health: + endpoints: + basic: /health + ready: /health/ready + live: /health/live + startup: /health/startup + probes: + startup: + initial_delay: 1s + period: 1s + timeout: 1s + failure_threshold: 3 + readiness: + period: 1s + timeout: 1s + failure_threshold: 3 + liveness: + period: 1s + timeout: 1s + failure_threshold: 3 + +logging: + level: debug + format: json + destinations: + - console + +performance: + request_timeout: 30s + max_request_size: 10MB + compression_enabled: false + circuit_breaker: + enabled: false + failure_threshold: 5 + timeout: 5s diff --git a/config/app/production.yml b/config/app/production.yml new file mode 100755 index 0000000..752f18f --- /dev/null +++ b/config/app/production.yml @@ -0,0 +1,92 @@ +# Application Configuration (K8s ConfigMap equivalent) +# Non-sensitive configuration for admin application services + +# Server Configuration +server: + name: mvp-backend + port: 3001 + environment: production + node_env: production + +# Database Configuration +database: + host: mvp-postgres + port: 5432 + name: motovaultpro + user: postgres + pool_size: 20 + # password loaded from secrets/app/postgres-password.txt + +# Redis Configuration +redis: + host: mvp-redis + port: 6379 + db: 0 + +# Auth0 Configuration +auth0: + domain: motovaultpro.us.auth0.com + audience: https://api.motovaultpro.com + # client_id and client_secret loaded from secrets + +# External APIs Configuration +# google_maps_api_key loaded from secrets/app/google-maps-api-key.txt + +# Service Authentication +service: + name: mvp-backend + # auth_token loaded from secrets/app/service-auth-token.txt + +# CORS Configuration +cors: + origins: + - https://motovaultpro.com + allow_credentials: true + max_age: 86400 + +# Frontend Configuration (Vite build variables) +frontend: + api_base_url: /api + auth0: + domain: motovaultpro.us.auth0.com + audience: https://api.motovaultpro.com + +# Health Check Configuration (K8s probe equivalent) +health: + endpoints: + basic: /health # Basic health check + ready: /health/ready # Readiness probe (K8s equivalent) + live: /health/live # Liveness probe (K8s equivalent) + startup: /health/startup # Startup probe (K8s equivalent) + probes: + startup: + initial_delay: 30s + period: 10s + timeout: 5s + failure_threshold: 6 + readiness: + period: 10s + timeout: 5s + failure_threshold: 3 + liveness: + period: 30s + timeout: 10s + failure_threshold: 3 + +# Logging Configuration +logging: + level: INFO + format: json + destinations: + - console + - file:/var/log/app/application.log + +# Performance Configuration +performance: + request_timeout: 30s + max_request_size: 10MB + compression_enabled: true + circuit_breaker: + enabled: true + failure_threshold: 5 + timeout: 30s \ No newline at end of file diff --git a/config/monitoring/alerts/service-health.yml b/config/monitoring/alerts/service-health.yml new file mode 100755 index 0000000..8b6342d --- /dev/null +++ b/config/monitoring/alerts/service-health.yml @@ -0,0 +1,105 @@ +# Service Health Alerts for MotoVaultPro K8s-equivalent monitoring +# These alerts mirror K8s PrometheusRule patterns for service monitoring + +groups: + - name: service-health + rules: + # Service availability alerts + - alert: ServiceDown + expr: up == 0 + for: 1m + labels: + severity: critical + team: platform + annotations: + summary: "Service {{ $labels.job }} is down" + description: "Service {{ $labels.job }} on {{ $labels.instance }} has been down for more than 1 minute." + + - alert: HighResponseTime + expr: traefik_service_request_duration_seconds{quantile="0.95"} > 2 + for: 2m + labels: + severity: warning + team: platform + annotations: + summary: "High response time for service {{ $labels.service }}" + description: "95th percentile response time for {{ $labels.service }} is {{ $value }}s" + + - alert: HighErrorRate + expr: rate(traefik_service_requests_total{code!~"2.."}[5m]) > 0.1 + for: 2m + labels: + severity: warning + team: platform + annotations: + summary: "High error rate for service {{ $labels.service }}" + description: "Error rate for {{ $labels.service }} is {{ $value | humanizePercentage }}" + + - name: database-health + rules: + # Database connection alerts + - alert: DatabaseConnectionsHigh + expr: pg_stat_database_numbackends / pg_settings_max_connections > 0.8 + for: 5m + labels: + severity: warning + team: database + annotations: + summary: "High database connections for {{ $labels.datname }}" + description: "Database {{ $labels.datname }} is using {{ $value | humanizePercentage }} of max connections" + + - alert: DatabaseReplicationLag + expr: pg_stat_replication_lag_bytes > 1073741824 # 1GB + for: 2m + labels: + severity: critical + team: database + annotations: + summary: "High replication lag for database {{ $labels.datname }}" + description: "Replication lag is {{ $value | humanize1024 }}B" + + - name: resource-usage + rules: + # Resource usage alerts + - alert: HighMemoryUsage + expr: container_memory_usage_bytes / container_spec_memory_limit_bytes > 0.9 + for: 2m + labels: + severity: warning + team: platform + annotations: + summary: "High memory usage for container {{ $labels.name }}" + description: "Container {{ $labels.name }} is using {{ $value | humanizePercentage }} of memory limit" + + - alert: HighCPUUsage + expr: rate(container_cpu_usage_seconds_total[5m]) / container_spec_cpu_quota * 100 > 80 + for: 5m + labels: + severity: warning + team: platform + annotations: + summary: "High CPU usage for container {{ $labels.name }}" + description: "Container {{ $labels.name }} is using {{ $value }}% CPU" + + - name: traefik-health + rules: + # Traefik specific alerts + - alert: TraefikServiceDiscoveryFailure + expr: increase(traefik_config_reloads_failure_total[5m]) > 0 + for: 1m + labels: + severity: warning + team: infrastructure + annotations: + summary: "Traefik configuration reload failures" + description: "Traefik has {{ $value }} configuration reload failures in the last 5 minutes" + + - alert: TraefikBackendDown + expr: traefik_service_server_up == 0 + for: 1m + labels: + severity: critical + team: platform + annotations: + summary: "Traefik backend {{ $labels.service }} is down" + description: "Backend server for service {{ $labels.service }} is unreachable" \ No newline at end of file diff --git a/config/monitoring/health-checks.yml b/config/monitoring/health-checks.yml new file mode 100755 index 0000000..05e5f29 --- /dev/null +++ b/config/monitoring/health-checks.yml @@ -0,0 +1,147 @@ +# Health Check Configuration for K8s-Ready Environment +# This file defines comprehensive health check patterns that mirror +# Kubernetes readiness, liveness, and startup probes + +health_checks: + # Application Services + admin-backend: + startup_probe: + path: /health/startup + initial_delay: 30s + period: 10s + timeout: 5s + failure_threshold: 6 + readiness_probe: + path: /health/ready + period: 10s + timeout: 5s + failure_threshold: 3 + liveness_probe: + path: /health/live + period: 30s + timeout: 10s + failure_threshold: 3 + + admin-frontend: + startup_probe: + path: / + initial_delay: 20s + period: 10s + timeout: 5s + failure_threshold: 3 + readiness_probe: + path: / + period: 15s + timeout: 5s + failure_threshold: 2 + liveness_probe: + path: / + period: 30s + timeout: 10s + failure_threshold: 3 + + # Platform Services + mvp-platform-vehicles-api: + startup_probe: + path: /health/startup + initial_delay: 30s + period: 10s + timeout: 5s + failure_threshold: 6 + readiness_probe: + path: /health/ready + period: 10s + timeout: 5s + failure_threshold: 3 + liveness_probe: + path: /health/live + period: 30s + timeout: 10s + failure_threshold: 3 + + mvp-platform-tenants: + startup_probe: + path: /health/startup + initial_delay: 30s + period: 10s + timeout: 5s + failure_threshold: 6 + readiness_probe: + path: /health/ready + period: 10s + timeout: 5s + failure_threshold: 3 + liveness_probe: + path: /health/live + period: 30s + timeout: 10s + failure_threshold: 3 + + mvp-platform-landing: + startup_probe: + path: / + initial_delay: 20s + period: 10s + timeout: 5s + failure_threshold: 3 + readiness_probe: + path: / + period: 15s + timeout: 5s + failure_threshold: 2 + liveness_probe: + path: / + period: 30s + timeout: 10s + failure_threshold: 3 + +# Monitoring Endpoints +monitoring: + metrics: + traefik: + endpoint: http://localhost:8080/metrics + format: prometheus + services: + admin-backend: + endpoint: /metrics + port: 3001 + mvp-platform-vehicles-api: + endpoint: /metrics + port: 8000 + mvp-platform-tenants: + endpoint: /metrics + port: 8000 + + logging: + level: INFO + format: json + destinations: + - console + - file:/var/log/app/application.log + +# Service Discovery Validation +service_discovery: + expected_services: + - admin-app@docker + - admin-api@docker + - landing@docker + - vehicles-api@docker + - tenants-api@docker + expected_routes: + - "Host(admin.motovaultpro.com)" + - "Host(motovaultpro.com)" + - "PathPrefix(/api/platform/vehicles)" + - "PathPrefix(/api/platform/tenants)" + - "PathPrefix(/api)" + +# Performance Thresholds (K8s-ready SLOs) +performance: + response_time: + target: 500ms + max: 2000ms + availability: + target: 99.9% + min: 99.0% + error_rate: + target: 0.1% + max: 1.0% \ No newline at end of file diff --git a/config/monitoring/prometheus.yml b/config/monitoring/prometheus.yml new file mode 100755 index 0000000..07ffc37 --- /dev/null +++ b/config/monitoring/prometheus.yml @@ -0,0 +1,105 @@ +# Prometheus Configuration for MotoVaultPro K8s-equivalent monitoring +# This configuration mirrors K8s ServiceMonitor and PodMonitor patterns + +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + cluster: 'motovaultpro-dev' + environment: 'development' + +# Rule files for alerting (K8s PrometheusRule equivalent) +rule_files: + - "alerts/*.yml" + +# Scrape configurations (K8s ServiceMonitor equivalent) +scrape_configs: + # Traefik metrics (Infrastructure monitoring) + - job_name: 'traefik' + static_configs: + - targets: ['traefik:8080'] + metrics_path: '/metrics' + scrape_interval: 15s + scrape_timeout: 10s + + # Application backend metrics + - job_name: 'admin-backend' + static_configs: + - targets: ['admin-backend:3001'] + metrics_path: '/metrics' + scrape_interval: 30s + scrape_timeout: 10s + + # Platform services metrics + - job_name: 'platform-vehicles-api' + static_configs: + - targets: ['mvp-platform-vehicles-api:8000'] + metrics_path: '/metrics' + scrape_interval: 30s + scrape_timeout: 10s + + - job_name: 'platform-tenants-api' + static_configs: + - targets: ['mvp-platform-tenants:8000'] + metrics_path: '/metrics' + scrape_interval: 30s + scrape_timeout: 10s + + # Database monitoring (PostgreSQL exporter equivalent) + - job_name: 'postgres-app' + static_configs: + - targets: ['admin-postgres:5432'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + - job_name: 'postgres-platform' + static_configs: + - targets: ['platform-postgres:5432'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + - job_name: 'postgres-vehicles' + static_configs: + - targets: ['mvp-platform-vehicles-db:5432'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + # Redis monitoring + - job_name: 'redis-app' + static_configs: + - targets: ['admin-redis:6379'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + - job_name: 'redis-platform' + static_configs: + - targets: ['platform-redis:6379'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + - job_name: 'redis-vehicles' + static_configs: + - targets: ['mvp-platform-vehicles-redis:6379'] + metrics_path: '/metrics' + scrape_interval: 60s + scrape_timeout: 15s + + # MinIO monitoring + - job_name: 'minio' + static_configs: + - targets: ['admin-minio:9000'] + metrics_path: '/minio/v2/metrics/cluster' + scrape_interval: 60s + scrape_timeout: 15s + +# Alertmanager configuration (K8s Alertmanager equivalent) +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager:9093 \ No newline at end of file diff --git a/config/shared/production.yml b/config/shared/production.yml new file mode 100755 index 0000000..0748010 --- /dev/null +++ b/config/shared/production.yml @@ -0,0 +1,136 @@ +# Shared Configuration (K8s ConfigMap equivalent) +# Common configuration shared across all services + +# Global Settings +global: + environment: production + cluster_name: motovaultpro-dev + namespace: motovaultpro + timezone: UTC + +# Common Service Configuration +service_defaults: + port_naming: + http: 8080 + https: 8443 + metrics: 9090 + + timeouts: + startup: 30s + readiness: 5s + liveness: 10s + shutdown: 30s + + resource_limits: + tier_1: # Critical services + memory: 2g + cpu: 2.0 + tier_2: # Supporting services + memory: 1g + cpu: 1.0 + tier_3: # Infrastructure services + memory: 512m + cpu: 0.5 + +# Network Configuration +networking: + networks: + frontend: + purpose: public-traffic-only + internal: false + backend: + purpose: api-services + internal: true + database: + purpose: app-data-layer + internal: true + platform: + purpose: platform-services + internal: true + +# Security Configuration +security: + tls: + min_version: "1.2" + preferred_version: "1.3" + cipher_suites: + - TLS_CHACHA20_POLY1305_SHA256 + - TLS_AES_256_GCM_SHA384 + - TLS_AES_128_GCM_SHA256 + + headers: + hsts_max_age: 31536000 + content_type_options: nosniff + frame_options: DENY + xss_protection: "1; mode=block" + +# Monitoring & Observability +monitoring: + prometheus: + enabled: true + port: 9090 + path: /metrics + scrape_interval: 30s + + health_checks: + interval: 30s + timeout: 10s + retries: 3 + + logging: + level: INFO + format: json + retention: 30d + +# Performance Configuration +performance: + compression: + enabled: true + types: + - text/html + - text/css + - text/javascript + - application/json + - application/xml + + caching: + enabled: true + max_age: 3600 + static_assets: 86400 + +# External Services Configuration +external_services: + auth0: + base_domain: motovaultpro.us.auth0.com + api_audience: https://api.motovaultpro.com + + google_maps: + base_url: https://maps.googleapis.com/maps/api + + vpic: + base_url: https://vpic.nhtsa.dot.gov/api/vehicles + +# Development Configuration +development: + debug_enabled: false + hot_reload: false + ssl_verify: true + local_overrides: false + +# Container Configuration +containers: + restart_policy: unless-stopped + log_driver: json-file + log_options: + max_size: 10m + max_file: 3 + +# Service Discovery Configuration +service_discovery: + provider: traefik + auto_discovery: true + health_check_path: /health + labels: + security_tier: production + monitoring: enabled + backup: required \ No newline at end of file diff --git a/config/traefik/middleware.yml b/config/traefik/middleware.yml new file mode 100755 index 0000000..eae7df8 --- /dev/null +++ b/config/traefik/middleware.yml @@ -0,0 +1,180 @@ +http: + middlewares: + # Security headers middleware + secure-headers: + headers: + accessControlAllowMethods: + - GET + - OPTIONS + - PUT + - POST + - DELETE + accessControlAllowOriginList: + - "https://admin.motovaultpro.com" + - "https://motovaultpro.com" + accessControlMaxAge: 100 + addVaryHeader: true + browserXssFilter: true + contentTypeNosniff: true + forceSTSHeader: true + frameDeny: true + stsIncludeSubdomains: true + stsPreload: true + stsSeconds: 31536000 + customRequestHeaders: + X-Forwarded-Proto: https + + # CORS middleware for API endpoints + cors: + headers: + accessControlAllowCredentials: true + accessControlAllowHeaders: + - "Authorization" + - "Content-Type" + - "X-Requested-With" + - "X-Tenant-ID" + accessControlAllowMethods: + - "GET" + - "POST" + - "PUT" + - "DELETE" + - "OPTIONS" + accessControlAllowOriginList: + - "https://admin.motovaultpro.com" + - "https://motovaultpro.com" + accessControlMaxAge: 100 + + # API authentication middleware + api-auth: + forwardAuth: + address: "http://admin-backend:3001/auth/verify" + authResponseHeaders: + - "X-Auth-User" + - "X-Auth-Roles" + - "X-Tenant-ID" + authRequestHeaders: + - "Authorization" + - "X-Tenant-ID" + trustForwardHeader: true + + # Platform API authentication middleware + platform-auth: + forwardAuth: + address: "http://admin-backend:3001/auth/verify-platform" + authResponseHeaders: + - "X-Service-Name" + - "X-Auth-Scope" + authRequestHeaders: + - "X-API-Key" + - "Authorization" + trustForwardHeader: true + + # Rate limiting middleware + rate-limit: + rateLimit: + burst: 100 + average: 50 + period: 1m + + # Request/response size limits + size-limit: + buffering: + maxRequestBodyBytes: 10485760 # 10MB + maxResponseBodyBytes: 10485760 # 10MB + + # IP whitelist for development (optional) + local-ips: + ipWhiteList: + sourceRange: + - "127.0.0.1/32" + - "10.0.0.0/8" + - "172.16.0.0/12" + - "192.168.0.0/16" + + # Advanced security headers for production + security-headers-strict: + headers: + accessControlAllowCredentials: false + accessControlAllowMethods: + - GET + - POST + - OPTIONS + accessControlAllowOriginList: + - "https://admin.motovaultpro.com" + - "https://motovaultpro.com" + browserXssFilter: true + contentTypeNosniff: true + customRequestHeaders: + X-Forwarded-Proto: https + customResponseHeaders: + X-Frame-Options: DENY + X-Content-Type-Options: nosniff + Referrer-Policy: strict-origin-when-cross-origin + Permissions-Policy: "geolocation=(), microphone=(), camera=()" + forceSTSHeader: true + frameDeny: true + stsIncludeSubdomains: true + stsPreload: true + stsSeconds: 31536000 + + # Circuit breaker for reliability + circuit-breaker: + circuitBreaker: + expression: "NetworkErrorRatio() > 0.3 || ResponseCodeRatio(500, 600, 0, 600) > 0.3" + checkPeriod: 30s + fallbackDuration: 10s + recoveryDuration: 30s + + # Request retry for resilience + retry-policy: + retry: + attempts: 3 + initialInterval: 100ms + + # Timeout middleware + timeout: + timeout: 30s + + # Compress responses for performance + compression: + compress: {} + + # Health check middleware chain + health-check-chain: + chain: + middlewares: + - compression + - secure-headers + - timeout + + # API middleware chain + api-chain: + chain: + middlewares: + - compression + - security-headers-strict + - cors + - rate-limit + - api-auth + - retry-policy + - timeout + + # Platform API middleware chain + platform-chain: + chain: + middlewares: + - compression + - security-headers-strict + - rate-limit + - platform-auth + - circuit-breaker + - retry-policy + - timeout + + # Public frontend middleware chain + frontend-chain: + chain: + middlewares: + - compression + - secure-headers + - timeout \ No newline at end of file diff --git a/config/traefik/traefik.yml b/config/traefik/traefik.yml new file mode 100755 index 0000000..9fdab12 --- /dev/null +++ b/config/traefik/traefik.yml @@ -0,0 +1,86 @@ +api: + dashboard: true + debug: true + insecure: true + +entryPoints: + web: + address: ":80" + http: + redirections: + entrypoint: + to: websecure + scheme: https + permanent: true + websecure: + address: ":443" + +providers: + docker: + endpoint: "unix:///var/run/docker.sock" + exposedByDefault: false + # Network auto-discovery - Traefik will use the networks it's connected to + file: + filename: /etc/traefik/middleware.yml + watch: true + +certificatesResolvers: + letsencrypt: + acme: + email: admin@motovaultpro.com + storage: /data/acme.json + httpChallenge: + entryPoint: web + # Use staging for development + caServer: https://acme-staging-v02.api.letsencrypt.org/directory + +# TLS configuration for local development +tls: + certificates: + - certFile: /certs/motovaultpro.com.crt + keyFile: /certs/motovaultpro.com.key + stores: + - default + +# Global configuration +global: + sendAnonymousUsage: false + +# Logging +log: + level: INFO + format: json + +# Access logs +accessLog: + format: json + fields: + defaultMode: keep + names: + ClientUsername: drop + headers: + defaultMode: keep + names: + User-Agent: redact + Authorization: drop + Cookie: drop + +# Metrics for monitoring +metrics: + prometheus: + addEntryPointsLabels: true + addServicesLabels: true + addRoutersLabels: true + buckets: + - 0.1 + - 0.3 + - 1.2 + - 5.0 + +# Monitoring and observability +ping: + entryPoint: "traefik" + +# Enhanced monitoring +serversTransport: + insecureSkipVerify: true \ No newline at end of file