fix: Update grafana dashboards
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 2m31s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped

This commit is contained in:
Eric Gullickson
2026-02-06 13:50:17 -06:00
parent 66314a0493
commit c88fbcdc4e
10 changed files with 57 additions and 88 deletions

View File

@@ -19,7 +19,7 @@ on:
env: env:
REGISTRY: git.motovaultpro.com REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro DEPLOY_PATH: /opt/motovaultpro
COMPOSE_FILE: docker-compose.yml BASE_COMPOSE_FILE: docker-compose.yml
COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml
COMPOSE_PROD: docker-compose.prod.yml COMPOSE_PROD: docker-compose.prod.yml
HEALTH_CHECK_TIMEOUT: "60" HEALTH_CHECK_TIMEOUT: "60"
@@ -170,7 +170,7 @@ jobs:
cd "$DEPLOY_PATH" cd "$DEPLOY_PATH"
# Start shared infrastructure services (database, cache, logging) # Start shared infrastructure services (database, cache, logging)
# These persist across blue-green deployments # These persist across blue-green deployments
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d \ docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d \
mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana
- name: Start target stack - name: Start target stack
@@ -182,7 +182,7 @@ jobs:
# --force-recreate ensures containers are recreated even if image tag is same # --force-recreate ensures containers are recreated even if image tag is same
# This prevents stale container content when image digest changes # This prevents stale container content when image digest changes
# Start shared OCR service and target stack # Start shared OCR service and target stack
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \ docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization - name: Wait for stack initialization
@@ -221,7 +221,7 @@ jobs:
- name: Start Traefik - name: Start Traefik
run: | run: |
cd "$DEPLOY_PATH" cd "$DEPLOY_PATH"
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik
- name: Wait for Traefik - name: Wait for Traefik
run: | run: |

View File

@@ -15,8 +15,8 @@ on:
env: env:
REGISTRY: git.motovaultpro.com REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro DEPLOY_PATH: /opt/motovaultpro
COMPOSE_FILE: docker-compose.yml BASE_COMPOSE_FILE: docker-compose.yml
COMPOSE_STAGING: docker-compose.staging.yml STAGING_COMPOSE_FILE: docker-compose.staging.yml
HEALTH_CHECK_TIMEOUT: "60" HEALTH_CHECK_TIMEOUT: "60"
LOG_LEVEL: DEBUG LOG_LEVEL: DEBUG
@@ -170,8 +170,8 @@ jobs:
export BACKEND_IMAGE=$BACKEND_IMAGE export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE export OCR_IMAGE=$OCR_IMAGE
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true docker compose -f $BASE_COMPOSE_FILE -f $STAGING_COMPOSE_FILE down --timeout 30 || true
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d docker compose -f $BASE_COMPOSE_FILE -f $STAGING_COMPOSE_FILE up -d
- name: Wait for services - name: Wait for services
run: sleep 5 run: sleep 5

View File

@@ -269,24 +269,17 @@
when: gitea_registry_token is defined when: gitea_registry_token is defined
# ============================================ # ============================================
# Maintenance Scripts # Remove Legacy Docker Cleanup (was destroying volumes)
# ============================================ # ============================================
- name: Create Docker cleanup script - name: Remove legacy Docker cleanup cron job
copy:
dest: /usr/local/bin/docker-cleanup.sh
content: |
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
mode: '0755'
- name: Schedule Docker cleanup cron job
cron: cron:
name: "Docker cleanup" name: "Docker cleanup"
minute: "0" state: absent
hour: "3"
job: "/usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1" - name: Remove legacy Docker cleanup script
file:
path: /usr/local/bin/docker-cleanup.sh
state: absent
# ============================================ # ============================================
# Production-Specific Security Hardening # Production-Specific Security Hardening

View File

@@ -300,24 +300,17 @@
when: gitea_registry_token is defined when: gitea_registry_token is defined
# ============================================ # ============================================
# Maintenance Scripts # Remove Legacy Docker Cleanup (was destroying volumes)
# ============================================ # ============================================
- name: Create Docker cleanup script - name: Remove legacy Docker cleanup cron job
copy:
dest: /usr/local/bin/docker-cleanup.sh
content: |
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
mode: '0755'
- name: Schedule Docker cleanup cron job
cron: cron:
name: "Docker cleanup" name: "Docker cleanup"
minute: "0" state: absent
hour: "3"
job: "/usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1" - name: Remove legacy Docker cleanup script
file:
path: /usr/local/bin/docker-cleanup.sh
state: absent
handlers: handlers:
- name: Restart act_runner - name: Restart act_runner

View File

@@ -73,7 +73,7 @@ groups:
datasourceUid: loki datasourceUid: loki
model: model:
refId: A refId: A
expr: 'count_over_time({container="mvp-backend"}[5m])' expr: 'count_over_time({container=~"mvp-backend(-staging)?"}[5m])'
queryType: instant queryType: instant
- refId: B - refId: B
relativeTimeRange: relativeTimeRange:
@@ -110,7 +110,7 @@ groups:
datasourceUid: loki datasourceUid: loki
model: model:
refId: A refId: A
expr: 'count_over_time({container="mvp-postgres"}[5m])' expr: 'count_over_time({container=~"mvp-postgres(-staging)?"}[5m])'
queryType: instant queryType: instant
- refId: B - refId: B
relativeTimeRange: relativeTimeRange:
@@ -147,7 +147,7 @@ groups:
datasourceUid: loki datasourceUid: loki
model: model:
refId: A refId: A
expr: 'count_over_time({container="mvp-redis"}[5m])' expr: 'count_over_time({container=~"mvp-redis(-staging)?"}[5m])'
queryType: instant queryType: instant
- refId: B - refId: B
relativeTimeRange: relativeTimeRange:
@@ -184,7 +184,7 @@ groups:
datasourceUid: loki datasourceUid: loki
model: model:
refId: A refId: A
expr: 'sum(count_over_time({container="mvp-backend"} | json | msg=`Request processed` | status >= 500 [5m]))' expr: 'sum(count_over_time({container=~"mvp-backend(-staging)?"} | json | msg=`Request processed` | status >= 500 [5m]))'
queryType: instant queryType: instant
- refId: B - refId: B
relativeTimeRange: relativeTimeRange:

View File

@@ -121,7 +121,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum(rate({container=\"mvp-backend\"} | json | msg=\"Request processed\" [1m]))", "expr": "sum(rate({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [1m]))",
"legendFormat": "Requests/sec", "legendFormat": "Requests/sec",
"refId": "A" "refId": "A"
} }
@@ -218,7 +218,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "quantile_over_time(0.50, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()", "expr": "quantile_over_time(0.50, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p50", "legendFormat": "p50",
"refId": "A" "refId": "A"
}, },
@@ -227,7 +227,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "quantile_over_time(0.95, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()", "expr": "quantile_over_time(0.95, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p95", "legendFormat": "p95",
"refId": "B" "refId": "B"
}, },
@@ -236,7 +236,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "quantile_over_time(0.99, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()", "expr": "quantile_over_time(0.99, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p99", "legendFormat": "p99",
"refId": "C" "refId": "C"
} }
@@ -303,7 +303,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum by (status) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))", "expr": "sum by (status) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{status}}", "legendFormat": "{{status}}",
"refId": "A" "refId": "A"
} }
@@ -389,7 +389,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum by (path) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))", "expr": "sum by (path) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{path}}", "legendFormat": "{{path}}",
"refId": "A" "refId": "A"
} }
@@ -481,7 +481,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "topk(10, avg by (path) (avg_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m])))", "expr": "topk(10, avg by (path) (avg_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m])))",
"legendFormat": "{{path}}", "legendFormat": "{{path}}",
"refId": "A" "refId": "A"
} }
@@ -564,7 +564,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum by (path, status) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))", "expr": "sum by (path, status) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{path}} - {{status}}", "legendFormat": "{{path}} - {{status}}",
"refId": "A" "refId": "A"
} }

View File

@@ -334,7 +334,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-backend\"}[5m])", "expr": "count_over_time({container=~\"mvp-backend(-staging)?\"}[5m])",
"legendFormat": "mvp-backend", "legendFormat": "mvp-backend",
"refId": "A" "refId": "A"
}, },
@@ -343,7 +343,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-frontend\"}[5m])", "expr": "count_over_time({container=~\"mvp-frontend(-staging)?\"}[5m])",
"legendFormat": "mvp-frontend", "legendFormat": "mvp-frontend",
"refId": "B" "refId": "B"
}, },
@@ -352,7 +352,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-postgres\"}[5m])", "expr": "count_over_time({container=~\"mvp-postgres(-staging)?\"}[5m])",
"legendFormat": "mvp-postgres", "legendFormat": "mvp-postgres",
"refId": "C" "refId": "C"
}, },
@@ -361,7 +361,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-redis\"}[5m])", "expr": "count_over_time({container=~\"mvp-redis(-staging)?\"}[5m])",
"legendFormat": "mvp-redis", "legendFormat": "mvp-redis",
"refId": "D" "refId": "D"
}, },
@@ -370,7 +370,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-traefik\"}[5m])", "expr": "count_over_time({container=~\"mvp-traefik(-staging)?\"}[5m])",
"legendFormat": "mvp-traefik", "legendFormat": "mvp-traefik",
"refId": "E" "refId": "E"
}, },
@@ -379,7 +379,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-ocr\"}[5m])", "expr": "count_over_time({container=~\"mvp-ocr(-staging)?\"}[5m])",
"legendFormat": "mvp-ocr", "legendFormat": "mvp-ocr",
"refId": "F" "refId": "F"
}, },
@@ -388,7 +388,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-loki\"}[5m])", "expr": "count_over_time({container=~\"mvp-loki(-staging)?\"}[5m])",
"legendFormat": "mvp-loki", "legendFormat": "mvp-loki",
"refId": "G" "refId": "G"
}, },
@@ -397,7 +397,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-alloy\"}[5m])", "expr": "count_over_time({container=~\"mvp-alloy(-staging)?\"}[5m])",
"legendFormat": "mvp-alloy", "legendFormat": "mvp-alloy",
"refId": "H" "refId": "H"
}, },
@@ -406,7 +406,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-grafana\"}[5m])", "expr": "count_over_time({container=~\"mvp-grafana(-staging)?\"}[5m])",
"legendFormat": "mvp-grafana", "legendFormat": "mvp-grafana",
"refId": "I" "refId": "I"
} }
@@ -494,7 +494,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [1m])", "expr": "count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [1m])",
"legendFormat": "Backend Requests", "legendFormat": "Backend Requests",
"refId": "A" "refId": "A"
} }

View File

@@ -337,7 +337,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum by (path) (count_over_time({container=\"mvp-backend\"} | json | level=\"error\" [5m]))", "expr": "sum by (path) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | level=\"error\" [5m]))",
"legendFormat": "{{path}}", "legendFormat": "{{path}}",
"refId": "A" "refId": "A"
} }
@@ -377,7 +377,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-backend\"} | json | level=\"error\" | line_format \"{{.error}}\\n{{.stack}}\"", "expr": "{container=~\"mvp-backend(-staging)?\"} | json | level=\"error\" | line_format \"{{.error}}\\n{{.stack}}\"",
"refId": "A" "refId": "A"
} }
], ],
@@ -416,7 +416,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-backend\"} |= \"$requestId\"", "expr": "{container=~\"mvp-backend(-staging)?\"} |= \"$requestId\"",
"refId": "A" "refId": "A"
} }
], ],
@@ -510,7 +510,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-backend\"} | json | msg=\"Request processed\" | status >= 500", "expr": "{container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | status >= 500",
"refId": "A" "refId": "A"
} }
], ],

View File

@@ -157,7 +157,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-postgres\"} |~ \"ERROR|WARNING|FATAL\"", "expr": "{container=~\"mvp-postgres(-staging)?\"} |~ \"ERROR|WARNING|FATAL\"",
"refId": "A" "refId": "A"
} }
], ],
@@ -196,7 +196,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-redis\"}", "expr": "{container=~\"mvp-redis(-staging)?\"}",
"refId": "A" "refId": "A"
} }
], ],
@@ -235,7 +235,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-traefik\"}", "expr": "{container=~\"mvp-traefik(-staging)?\"}",
"refId": "A" "refId": "A"
} }
], ],
@@ -274,7 +274,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-traefik\"} |~ \"level=error|err=\"", "expr": "{container=~\"mvp-traefik(-staging)?\"} |~ \"level=error|err=\"",
"refId": "A" "refId": "A"
} }
], ],
@@ -313,7 +313,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-ocr\"}", "expr": "{container=~\"mvp-ocr(-staging)?\"}",
"refId": "A" "refId": "A"
} }
], ],
@@ -352,7 +352,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "{container=\"mvp-ocr\"} |~ \"ERROR|error|Exception|Traceback\"", "expr": "{container=~\"mvp-ocr(-staging)?\"} |~ \"ERROR|error|Exception|Traceback\"",
"refId": "A" "refId": "A"
} }
], ],
@@ -439,7 +439,7 @@
"type": "loki", "type": "loki",
"uid": "${datasource}" "uid": "${datasource}"
}, },
"expr": "sum(rate({container=\"mvp-loki\"}[1m]))", "expr": "sum(rate({container=~\"mvp-loki(-staging)?\"}[1m]))",
"legendFormat": "Loki Lines/min", "legendFormat": "Loki Lines/min",
"refId": "A" "refId": "A"
} }

View File

@@ -240,23 +240,6 @@ sudo -u act_runner docker push git.motovaultpro.com/egullickson/test:latest
## Maintenance ## Maintenance
### Disk Cleanup
```bash
# Create cleanup script
sudo tee /usr/local/bin/docker-cleanup.sh > /dev/null <<'EOF'
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
EOF
sudo chmod +x /usr/local/bin/docker-cleanup.sh
# Add to crontab (run daily at 3 AM)
echo "0 3 * * * /usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1" | sudo crontab -
```
### Update Runner ### Update Runner
```bash ```bash