From ca8d0628261a23ff28fbd07ea5da51b8f67e4efa Mon Sep 17 00:00:00 2001 From: Codex Date: Tue, 5 May 2026 10:54:39 -0500 Subject: [PATCH] feat(monitoring): mirror Update Center Operations dashboard (Track 1D) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds fc-updatecenter-dashboard.json (uid: fc-updatecenter, version: 2) to apps/monitoring/ — mirrors the dashboard deployed to noc1 at /opt/monitoring/grafana/dashboards/fc-updatecenter-dashboard.json. 13 panels: 5 existing probe/availability panels + 1 OTEL row header + 7 new panels for the 6 OTEL counters added to FlowerCore.Updater.Web: updatecenter_manifest_requests_total updatecenter_bundle_download_bytes_total updatecenter_bundle_downloads_total updatecenter_checkins_total updatecenter_release_publishes_total updatecenter_signature_verify_failures_total Live on Grafana at https://grafana.iamworkin.lan/d/fc-updatecenter Co-Authored-By: Claude Opus 4.7 (1M context) --- .../monitoring/fc-updatecenter-dashboard.json | 762 ++++++++++++++++++ 1 file changed, 762 insertions(+) create mode 100644 apps/monitoring/fc-updatecenter-dashboard.json diff --git a/apps/monitoring/fc-updatecenter-dashboard.json b/apps/monitoring/fc-updatecenter-dashboard.json new file mode 100644 index 0000000..4ba7301 --- /dev/null +++ b/apps/monitoring/fc-updatecenter-dashboard.json @@ -0,0 +1,762 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [ + { + "icon": "external link", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Open Service", + "type": "link", + "url": "https://updatecenter.iamworkin.lan/" + } + ], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "#f87171", + "index": 1, + "text": "DOWN" + }, + "1": { + "color": "#4ade80", + "index": 0, + "text": "UP" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#f87171", + "value": null + }, + { + "color": "#4ade80", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}", + "refId": "A", + "legendFormat": "Availability" + } + ], + "title": "Service Availability", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#f87171", + "value": null + }, + { + "color": "#fbbf24", + "value": 95 + }, + { + "color": "#FFB300", + "value": 99 + }, + { + "color": "#4ade80", + "value": 99.9 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 2, + "options": { + "colorMode": "background_solid", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "avg_over_time(probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}[24h]) * 100", + "refId": "A", + "legendFormat": "24h Uptime" + } + ], + "title": "24-Hour Uptime", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "max": 30, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#f87171", + "value": null + }, + { + "color": "#fbbf24", + "value": 2 + }, + { + "color": "#4ade80", + "value": 7 + } + ] + }, + "unit": "d" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 3, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "(probe_ssl_earliest_cert_expiry{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"} - time()) / 86400", + "refId": "A", + "legendFormat": "Days Remaining" + } + ], + "title": "Cert Expiry (Days)", + "transparent": true, + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Response Time (seconds)", + "drawStyle": "line", + "fillOpacity": 12, + "gradientMode": "scheme", + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 4, + "showPoints": "never", + "spanNulls": true, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#4ade80", + "value": null + }, + { + "color": "#fbbf24", + "value": 2 + }, + { + "color": "#f87171", + "value": 5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 14, + "x": 0, + "y": 4 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "mean", + "max" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "probe_duration_seconds{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}", + "refId": "A", + "legendFormat": "Probe Duration" + } + ], + "timeFrom": "1h", + "title": "Response Time (1h Trend)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "gridPos": { + "h": 8, + "w": 10, + "x": 14, + "y": 4 + }, + "id": 5, + "options": { + "alertInstanceLabelFilter": "{instance=\"updatecenter.iamworkin.lan\"}", + "alertName": "", + "dashboardAlerts": false, + "groupBy": [], + "groupMode": "default", + "maxItems": 10, + "sortOrder": 1, + "stateFilter": { + "error": true, + "firing": true, + "noData": true, + "normal": false, + "pending": true + }, + "viewMode": "list" + }, + "title": "Active Alerts", + "type": "alertlist" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 20, + "title": "OTEL Counters — Track 1D", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 1, + "fillOpacity": 10 + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 13 + }, + "id": 21, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "sum by (status) (rate(updatecenter_manifest_requests_total[5m]))", + "refId": "A", + "legendFormat": "status={{status}}" + } + ], + "title": "Manifest Requests rate by status (5m)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 1, + "fillOpacity": 10 + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 13 + }, + "id": 22, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "sum by (slug) (rate(updatecenter_bundle_download_bytes_total[5m]))", + "refId": "A", + "legendFormat": "{{slug}}" + } + ], + "title": "Bundle Download Throughput by slug (5m)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 1, + "fillOpacity": 10 + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 23, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "sum by (status) (rate(updatecenter_checkins_total[5m]))", + "refId": "A", + "legendFormat": "status={{status}}" + } + ], + "title": "Agent Check-in Rate by status (5m)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "#4ade80", "value": null }, + { "color": "#f87171", "value": 1 } + ] + }, + "unit": "none", + "decimals": 2 + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 21 + }, + "id": 24, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": ["sum"], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "increase(updatecenter_signature_verify_failures_total[1h])", + "refId": "A", + "legendFormat": "Sig Verify Failures (1h)" + } + ], + "title": "Signature Verify Failures (1h)", + "transparent": true, + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 1, + "fillOpacity": 10 + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 21 + }, + "id": 25, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "sum by (slug, channel) (rate(updatecenter_release_publishes_total[5m]))", + "refId": "A", + "legendFormat": "{{slug}}/{{channel}}" + } + ], + "title": "Release Publishes rate by slug/channel (5m)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 1, + "fillOpacity": 10 + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 26, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "sum by (kind, status) (rate(updatecenter_bundle_downloads_total[5m]))", + "refId": "A", + "legendFormat": "{{kind}} / {{status}}" + } + ], + "title": "Bundle Download Requests by kind/status (5m)", + "transparent": true, + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "lineWidth": 2, + "fillOpacity": 20 + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "#4ade80", "value": null }, + { "color": "#f87171", "value": 0.01 } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 27, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "calcs": ["mean", "lastNotNull"] + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "fffjikve8llhce" + }, + "expr": "rate(updatecenter_signature_verify_failures_total[5m])", + "refId": "A", + "legendFormat": "Sig verify failures/s" + } + ], + "title": "Signature Verify Failure Rate (5m) — Critical if >0", + "transparent": true, + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": [ + "blue-jay", + "flowercore", + "synthetic", + "updatecenter", + "otel" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timezone": "browser", + "title": "FlowerCore.UpdateCenter Dashboard", + "uid": "fc-updatecenter", + "version": 2 +}