fix: Update grafana dashboards
All checks were successful
Deploy to Staging / Build Images (push) Successful in 35s
Deploy to Staging / Deploy to Staging (push) Successful in 51s
Deploy to Staging / Verify Staging (push) Successful in 2m31s
Deploy to Staging / Notify Staging Ready (push) Successful in 7s
Deploy to Staging / Notify Staging Failure (push) Has been skipped

This commit is contained in:
Eric Gullickson
2026-02-06 13:50:17 -06:00
parent 66314a0493
commit c88fbcdc4e
10 changed files with 57 additions and 88 deletions

View File

@@ -19,7 +19,7 @@ on:
env:
REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro
COMPOSE_FILE: docker-compose.yml
BASE_COMPOSE_FILE: docker-compose.yml
COMPOSE_BLUE_GREEN: docker-compose.blue-green.yml
COMPOSE_PROD: docker-compose.prod.yml
HEALTH_CHECK_TIMEOUT: "60"
@@ -170,7 +170,7 @@ jobs:
cd "$DEPLOY_PATH"
# Start shared infrastructure services (database, cache, logging)
# These persist across blue-green deployments
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d \
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d \
mvp-postgres mvp-redis mvp-loki mvp-alloy mvp-grafana
- name: Start target stack
@@ -182,7 +182,7 @@ jobs:
# --force-recreate ensures containers are recreated even if image tag is same
# This prevents stale container content when image digest changes
# Start shared OCR service and target stack
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d --force-recreate \
mvp-ocr mvp-frontend-$TARGET_STACK mvp-backend-$TARGET_STACK
- name: Wait for stack initialization
@@ -221,7 +221,7 @@ jobs:
- name: Start Traefik
run: |
cd "$DEPLOY_PATH"
docker compose -f $COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik
docker compose -f $BASE_COMPOSE_FILE -f $COMPOSE_BLUE_GREEN -f $COMPOSE_PROD up -d mvp-traefik
- name: Wait for Traefik
run: |

View File

@@ -15,8 +15,8 @@ on:
env:
REGISTRY: git.motovaultpro.com
DEPLOY_PATH: /opt/motovaultpro
COMPOSE_FILE: docker-compose.yml
COMPOSE_STAGING: docker-compose.staging.yml
BASE_COMPOSE_FILE: docker-compose.yml
STAGING_COMPOSE_FILE: docker-compose.staging.yml
HEALTH_CHECK_TIMEOUT: "60"
LOG_LEVEL: DEBUG
@@ -170,8 +170,8 @@ jobs:
export BACKEND_IMAGE=$BACKEND_IMAGE
export FRONTEND_IMAGE=$FRONTEND_IMAGE
export OCR_IMAGE=$OCR_IMAGE
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING down --timeout 30 || true
docker compose -f $COMPOSE_FILE -f $COMPOSE_STAGING up -d
docker compose -f $BASE_COMPOSE_FILE -f $STAGING_COMPOSE_FILE down --timeout 30 || true
docker compose -f $BASE_COMPOSE_FILE -f $STAGING_COMPOSE_FILE up -d
- name: Wait for services
run: sleep 5

View File

@@ -269,24 +269,17 @@
when: gitea_registry_token is defined
# ============================================
# Maintenance Scripts
# Remove Legacy Docker Cleanup (was destroying volumes)
# ============================================
- name: Create Docker cleanup script
copy:
dest: /usr/local/bin/docker-cleanup.sh
content: |
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
mode: '0755'
- name: Schedule Docker cleanup cron job
- name: Remove legacy Docker cleanup cron job
cron:
name: "Docker cleanup"
minute: "0"
hour: "3"
job: "/usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1"
state: absent
- name: Remove legacy Docker cleanup script
file:
path: /usr/local/bin/docker-cleanup.sh
state: absent
# ============================================
# Production-Specific Security Hardening

View File

@@ -300,24 +300,17 @@
when: gitea_registry_token is defined
# ============================================
# Maintenance Scripts
# Remove Legacy Docker Cleanup (was destroying volumes)
# ============================================
- name: Create Docker cleanup script
copy:
dest: /usr/local/bin/docker-cleanup.sh
content: |
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
mode: '0755'
- name: Schedule Docker cleanup cron job
- name: Remove legacy Docker cleanup cron job
cron:
name: "Docker cleanup"
minute: "0"
hour: "3"
job: "/usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1"
state: absent
- name: Remove legacy Docker cleanup script
file:
path: /usr/local/bin/docker-cleanup.sh
state: absent
handlers:
- name: Restart act_runner

View File

@@ -73,7 +73,7 @@ groups:
datasourceUid: loki
model:
refId: A
expr: 'count_over_time({container="mvp-backend"}[5m])'
expr: 'count_over_time({container=~"mvp-backend(-staging)?"}[5m])'
queryType: instant
- refId: B
relativeTimeRange:
@@ -110,7 +110,7 @@ groups:
datasourceUid: loki
model:
refId: A
expr: 'count_over_time({container="mvp-postgres"}[5m])'
expr: 'count_over_time({container=~"mvp-postgres(-staging)?"}[5m])'
queryType: instant
- refId: B
relativeTimeRange:
@@ -147,7 +147,7 @@ groups:
datasourceUid: loki
model:
refId: A
expr: 'count_over_time({container="mvp-redis"}[5m])'
expr: 'count_over_time({container=~"mvp-redis(-staging)?"}[5m])'
queryType: instant
- refId: B
relativeTimeRange:
@@ -184,7 +184,7 @@ groups:
datasourceUid: loki
model:
refId: A
expr: 'sum(count_over_time({container="mvp-backend"} | json | msg=`Request processed` | status >= 500 [5m]))'
expr: 'sum(count_over_time({container=~"mvp-backend(-staging)?"} | json | msg=`Request processed` | status >= 500 [5m]))'
queryType: instant
- refId: B
relativeTimeRange:

View File

@@ -121,7 +121,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum(rate({container=\"mvp-backend\"} | json | msg=\"Request processed\" [1m]))",
"expr": "sum(rate({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [1m]))",
"legendFormat": "Requests/sec",
"refId": "A"
}
@@ -218,7 +218,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "quantile_over_time(0.50, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"expr": "quantile_over_time(0.50, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p50",
"refId": "A"
},
@@ -227,7 +227,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "quantile_over_time(0.95, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"expr": "quantile_over_time(0.95, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p95",
"refId": "B"
},
@@ -236,7 +236,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "quantile_over_time(0.99, {container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"expr": "quantile_over_time(0.99, {container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m]) by ()",
"legendFormat": "p99",
"refId": "C"
}
@@ -303,7 +303,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum by (status) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))",
"expr": "sum by (status) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{status}}",
"refId": "A"
}
@@ -389,7 +389,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum by (path) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))",
"expr": "sum by (path) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{path}}",
"refId": "A"
}
@@ -481,7 +481,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "topk(10, avg by (path) (avg_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m])))",
"expr": "topk(10, avg by (path) (avg_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | unwrap duration | __error__=\"\" [5m])))",
"legendFormat": "{{path}}",
"refId": "A"
}
@@ -564,7 +564,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum by (path, status) (count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [5m]))",
"expr": "sum by (path, status) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [5m]))",
"legendFormat": "{{path}} - {{status}}",
"refId": "A"
}

View File

@@ -334,7 +334,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-backend\"}[5m])",
"expr": "count_over_time({container=~\"mvp-backend(-staging)?\"}[5m])",
"legendFormat": "mvp-backend",
"refId": "A"
},
@@ -343,7 +343,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-frontend\"}[5m])",
"expr": "count_over_time({container=~\"mvp-frontend(-staging)?\"}[5m])",
"legendFormat": "mvp-frontend",
"refId": "B"
},
@@ -352,7 +352,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-postgres\"}[5m])",
"expr": "count_over_time({container=~\"mvp-postgres(-staging)?\"}[5m])",
"legendFormat": "mvp-postgres",
"refId": "C"
},
@@ -361,7 +361,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-redis\"}[5m])",
"expr": "count_over_time({container=~\"mvp-redis(-staging)?\"}[5m])",
"legendFormat": "mvp-redis",
"refId": "D"
},
@@ -370,7 +370,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-traefik\"}[5m])",
"expr": "count_over_time({container=~\"mvp-traefik(-staging)?\"}[5m])",
"legendFormat": "mvp-traefik",
"refId": "E"
},
@@ -379,7 +379,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-ocr\"}[5m])",
"expr": "count_over_time({container=~\"mvp-ocr(-staging)?\"}[5m])",
"legendFormat": "mvp-ocr",
"refId": "F"
},
@@ -388,7 +388,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-loki\"}[5m])",
"expr": "count_over_time({container=~\"mvp-loki(-staging)?\"}[5m])",
"legendFormat": "mvp-loki",
"refId": "G"
},
@@ -397,7 +397,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-alloy\"}[5m])",
"expr": "count_over_time({container=~\"mvp-alloy(-staging)?\"}[5m])",
"legendFormat": "mvp-alloy",
"refId": "H"
},
@@ -406,7 +406,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-grafana\"}[5m])",
"expr": "count_over_time({container=~\"mvp-grafana(-staging)?\"}[5m])",
"legendFormat": "mvp-grafana",
"refId": "I"
}
@@ -494,7 +494,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "count_over_time({container=\"mvp-backend\"} | json | msg=\"Request processed\" [1m])",
"expr": "count_over_time({container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" [1m])",
"legendFormat": "Backend Requests",
"refId": "A"
}

View File

@@ -337,7 +337,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum by (path) (count_over_time({container=\"mvp-backend\"} | json | level=\"error\" [5m]))",
"expr": "sum by (path) (count_over_time({container=~\"mvp-backend(-staging)?\"} | json | level=\"error\" [5m]))",
"legendFormat": "{{path}}",
"refId": "A"
}
@@ -377,7 +377,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-backend\"} | json | level=\"error\" | line_format \"{{.error}}\\n{{.stack}}\"",
"expr": "{container=~\"mvp-backend(-staging)?\"} | json | level=\"error\" | line_format \"{{.error}}\\n{{.stack}}\"",
"refId": "A"
}
],
@@ -416,7 +416,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-backend\"} |= \"$requestId\"",
"expr": "{container=~\"mvp-backend(-staging)?\"} |= \"$requestId\"",
"refId": "A"
}
],
@@ -510,7 +510,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-backend\"} | json | msg=\"Request processed\" | status >= 500",
"expr": "{container=~\"mvp-backend(-staging)?\"} | json | msg=\"Request processed\" | status >= 500",
"refId": "A"
}
],

View File

@@ -157,7 +157,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-postgres\"} |~ \"ERROR|WARNING|FATAL\"",
"expr": "{container=~\"mvp-postgres(-staging)?\"} |~ \"ERROR|WARNING|FATAL\"",
"refId": "A"
}
],
@@ -196,7 +196,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-redis\"}",
"expr": "{container=~\"mvp-redis(-staging)?\"}",
"refId": "A"
}
],
@@ -235,7 +235,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-traefik\"}",
"expr": "{container=~\"mvp-traefik(-staging)?\"}",
"refId": "A"
}
],
@@ -274,7 +274,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-traefik\"} |~ \"level=error|err=\"",
"expr": "{container=~\"mvp-traefik(-staging)?\"} |~ \"level=error|err=\"",
"refId": "A"
}
],
@@ -313,7 +313,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-ocr\"}",
"expr": "{container=~\"mvp-ocr(-staging)?\"}",
"refId": "A"
}
],
@@ -352,7 +352,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "{container=\"mvp-ocr\"} |~ \"ERROR|error|Exception|Traceback\"",
"expr": "{container=~\"mvp-ocr(-staging)?\"} |~ \"ERROR|error|Exception|Traceback\"",
"refId": "A"
}
],
@@ -439,7 +439,7 @@
"type": "loki",
"uid": "${datasource}"
},
"expr": "sum(rate({container=\"mvp-loki\"}[1m]))",
"expr": "sum(rate({container=~\"mvp-loki(-staging)?\"}[1m]))",
"legendFormat": "Loki Lines/min",
"refId": "A"
}

View File

@@ -240,23 +240,6 @@ sudo -u act_runner docker push git.motovaultpro.com/egullickson/test:latest
## Maintenance
### Disk Cleanup
```bash
# Create cleanup script
sudo tee /usr/local/bin/docker-cleanup.sh > /dev/null <<'EOF'
#!/bin/bash
# Remove unused Docker resources older than 7 days
docker system prune -af --filter "until=168h"
docker volume prune -f
EOF
sudo chmod +x /usr/local/bin/docker-cleanup.sh
# Add to crontab (run daily at 3 AM)
echo "0 3 * * * /usr/local/bin/docker-cleanup.sh >> /var/log/docker-cleanup.log 2>&1" | sudo crontab -
```
### Update Runner
```bash