From 2c8968f5d001671eb1b9274c16faf1f37938017f Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Tue, 2 Jun 2026 02:38:54 -0500 Subject: [PATCH] Wire SignalControl platform observability --- apps/fc-signalcontrol/README.md | 33 +++ apps/fc-signalcontrol/fc-signalcontrol.yaml | 44 ++- .../grafana-dashboard-signalcontrol.yaml | 260 ++++++++++++++++++ apps/monitoring/noc-monitoring.yaml | 19 ++ .../FleetManifestLintTests.cs | 84 ++++++ 5 files changed, 439 insertions(+), 1 deletion(-) create mode 100644 apps/fc-signalcontrol/README.md create mode 100644 apps/monitoring/grafana-dashboard-signalcontrol.yaml diff --git a/apps/fc-signalcontrol/README.md b/apps/fc-signalcontrol/README.md new file mode 100644 index 0000000..93c47e7 --- /dev/null +++ b/apps/fc-signalcontrol/README.md @@ -0,0 +1,33 @@ +# FlowerCore SignalControl platform notes + +This app owns the cluster web manager at `signalcontrol.iamworkin.lan` and documents the physical Pi pilot at `signal-a.iamworkin.lan` / `pirelay`. + +## mTLS enrollment pattern + +Do not install or restart anything from this repo. The intended pirelay pattern is the Pi-signage step-ca-agent shape: + +- stable node identity: `pirelay` +- local private key and CSR generated on the node +- CSR submitted through the approved DeviceManagement/step-ca enrollment path +- client certificate and chain stored node-local under `/etc/flowercore/signalcontrol/mtls/` +- daily renewal timer, renewing only when fewer than 30 days remain +- certificate used for DM-agent to DM-web traffic and future SignalControl inter-service calls + +Secrets, enrollment codes, private keys, p12 passphrases, and OIDC client secrets stay out of Git. + +## Telemetry + +Monitoring manifests add a dedicated Prometheus job: + +- `signalcontrol-pi-app` +- target `10.0.58.113:5200` +- path `/metrics/prometheus` +- labels `instance="pirelay"`, `host="signal-a.iamworkin.lan"`, `service="signalcontrol-pi"` + +Host metrics continue through the `edge-nodes` node_exporter target at `10.0.58.113:9100`. + +## Physical-control audit + +The app ships with `FlowerCore:SignalControl:PhysicalAudit:Enabled=false` and `ForwardingEnabled=false`. Enabling local audit creates a SHA-256 hash chain for physical-control mutations. Forwarding to `https://audit.iamworkin.lan/api/v1/audit/signalcontrol` requires flipping the forwarding gate separately. + +Telemetry reads and `/metrics` scrapes are not audited. diff --git a/apps/fc-signalcontrol/fc-signalcontrol.yaml b/apps/fc-signalcontrol/fc-signalcontrol.yaml index 492fa55..758b6b9 100644 --- a/apps/fc-signalcontrol/fc-signalcontrol.yaml +++ b/apps/fc-signalcontrol/fc-signalcontrol.yaml @@ -46,7 +46,7 @@ spec: spec: containers: - name: signalcontrol-web - image: localhost/fc-signalcontrol-web:latest + image: localhost/fc-signalcontrol-web:s50cx12-20260602-1d26c58 imagePullPolicy: Never ports: - containerPort: 5000 @@ -65,6 +65,48 @@ spec: secretKeyRef: name: signalcontrol-auth key: Auth__ApiKey + - name: Auth__AdminApiKey + valueFrom: + secretKeyRef: + name: signalcontrol-auth + key: Auth__AdminApiKey + optional: true + - name: Auth__Enabled + value: "false" + - name: FlowerCore__Auth__Enabled + value: "false" + - name: FlowerCore__Auth__Oidc__Enabled + value: "true" + - name: FlowerCore__Auth__Oidc__Authority + valueFrom: + secretKeyRef: + name: signalcontrol-oidc-client + key: issuer_url + optional: true + - name: FlowerCore__Auth__Oidc__ClientId + valueFrom: + secretKeyRef: + name: signalcontrol-oidc-client + key: client_id + optional: true + - name: FlowerCore__Auth__Oidc__ClientSecret + valueFrom: + secretKeyRef: + name: signalcontrol-oidc-client + key: client_secret + optional: true + - name: TrafficSignal__RelayBridge__Enabled + value: "true" + - name: TrafficSignal__RelayBridge__BaseUrl + value: https://pirelay.iamworkin.lan + - name: TrafficSignal__RelayBridge__ApiKey + valueFrom: + secretKeyRef: + name: signalcontrol-pirelay + key: ApiKey + optional: true + - name: LiveStatus__TrafficSignal__BaseAddress + value: https://signalcontrol.iamworkin.lan volumeMounts: - name: data mountPath: /data diff --git a/apps/monitoring/grafana-dashboard-signalcontrol.yaml b/apps/monitoring/grafana-dashboard-signalcontrol.yaml new file mode 100644 index 0000000..175ea7e --- /dev/null +++ b/apps/monitoring/grafana-dashboard-signalcontrol.yaml @@ -0,0 +1,260 @@ +# Grafana dashboard ConfigMap for FlowerCore.SignalControl on pirelay. +# +# The Grafana Deployment in noc-monitoring.yaml mounts this ConfigMap at +# /var/lib/grafana/dashboards/signalcontrol. The paired Prometheus jobs are: +# - signalcontrol-pi-app: 10.0.58.113:5200 /metrics/prometheus +# - edge-nodes: 10.0.58.113:9100 with instance="pirelay" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-signalcontrol + namespace: monitoring +data: + signalcontrol.json: | + { + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "editorMode": "code", "expr": "up{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}", "range": true, "refId": "A" } + ], + "title": "SignalControl App Up", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "editorMode": "code", "expr": "up{job=\"edge-nodes\",instance=\"pirelay\"}", "range": true, "refId": "A" } + ], + "title": "pirelay node_exporter Up", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "name" + }, + "targets": [ + { "editorMode": "code", "expr": "signalcontrol_active_pattern{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}", "legendFormat": "{{pattern}}", "range": true, "refId": "A" } + ], + "title": "Active Pattern", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "name" + }, + "targets": [ + { "editorMode": "code", "expr": "signalcontrol_phase{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}", "legendFormat": "{{phase}}", "range": true, "refId": "A" } + ], + "title": "Current Phase", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 5 }, + "id": 5, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (channel, state) (rate(signal_relay_writes_total{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}[$__rate_interval]))", + "legendFormat": "channel {{channel}} {{state}}", + "range": true, + "refId": "A" + } + ], + "title": "Relay Activations", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "ops" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 5 }, + "id": 6, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (source, to) (rate(signal_transitions_total{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}[$__rate_interval]))", + "legendFormat": "{{source}} -> {{to}}", + "range": true, + "refId": "A" + } + ], + "title": "Phase Dwell / Transitions", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 13 }, + "id": 7, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "targets": [ + { + "editorMode": "code", + "expr": "sum by (action, outcome) (increase(signal_schedule_fires_total{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}[24h]))", + "legendFormat": "{{action}} {{outcome}}", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "sum by (from_pattern, to_pattern) (increase(flowercore_signalcontrol_pattern_switches_total{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}[24h]))", + "legendFormat": "{{from_pattern}} -> {{to_pattern}}", + "range": true, + "refId": "B" + } + ], + "title": "Schedule Fires and Pattern Switches", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 13 }, + "id": 8, + "options": { "legend": { "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "targets": [ + { + "editorMode": "code", + "expr": "1 - avg by (instance) (rate(node_cpu_seconds_total{job=\"edge-nodes\",instance=\"pirelay\",mode=\"idle\"}[$__rate_interval]))", + "legendFormat": "CPU", + "range": true, + "refId": "A" + }, + { + "editorMode": "code", + "expr": "1 - (node_memory_MemAvailable_bytes{job=\"edge-nodes\",instance=\"pirelay\"} / node_memory_MemTotal_bytes{job=\"edge-nodes\",instance=\"pirelay\"})", + "legendFormat": "Memory", + "range": true, + "refId": "B" + } + ], + "title": "pirelay Host Utilization", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 21 }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "auto" + }, + "targets": [ + { "editorMode": "code", "expr": "signalcontrol_screen_saver_enabled{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}", "range": true, "refId": "A" } + ], + "title": "Screen-saver Enabled", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "fieldConfig": { "defaults": { "unit": "short" }, "overrides": [] }, + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 21 }, + "id": 10, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, + "textMode": "name" + }, + "targets": [ + { "editorMode": "code", "expr": "signalcontrol_animation_active{job=\"signalcontrol-pi-app\",instance=\"pirelay\"}", "legendFormat": "{{planner}}", "range": true, "refId": "A" } + ], + "title": "Screen-saver / Animation Engaged", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "style": "dark", + "tags": [ "flowercore", "signalcontrol", "pirelay" ], + "templating": { "list": [] }, + "time": { "from": "now-24h", "to": "now" }, + "timezone": "browser", + "title": "FlowerCore SignalControl", + "uid": "flowercore-signalcontrol", + "version": 1 + } diff --git a/apps/monitoring/noc-monitoring.yaml b/apps/monitoring/noc-monitoring.yaml index a8f8acb..35fe00d 100644 --- a/apps/monitoring/noc-monitoring.yaml +++ b/apps/monitoring/noc-monitoring.yaml @@ -230,6 +230,19 @@ data: vlan: "home" device: "pi3-ks0212" + # SignalControl Pi-edition app metrics (pirelay / signal-a) + - job_name: "signalcontrol-pi-app" + scrape_interval: 15s + metrics_path: /metrics/prometheus + static_configs: + - targets: ["10.0.58.113:5200"] + labels: + instance: "pirelay" + host: "signal-a.iamworkin.lan" + service: "signalcontrol-pi" + vlan: "home" + device: "pi3-ks0212" + # Epson ET-3750 EcoTank Printer SNMP - job_name: "snmp-printer" scrape_interval: 5m @@ -4051,6 +4064,9 @@ spec: - name: dashboards-remotedesktop mountPath: /var/lib/grafana/dashboards/remotedesktop readOnly: true + - name: dashboards-signalcontrol + mountPath: /var/lib/grafana/dashboards/signalcontrol + readOnly: true - name: datasource-provisioning mountPath: /etc/grafana/provisioning/datasources readOnly: true @@ -4104,6 +4120,9 @@ spec: - name: dashboards-remotedesktop configMap: name: grafana-dashboard-remotedesktop + - name: dashboards-signalcontrol + configMap: + name: grafana-dashboard-signalcontrol - name: datasource-provisioning configMap: name: grafana-datasource-provisioning diff --git a/tests/bluejay-infra-lint/FleetManifestLintTests.cs b/tests/bluejay-infra-lint/FleetManifestLintTests.cs index dfdc671..890a3c1 100644 --- a/tests/bluejay-infra-lint/FleetManifestLintTests.cs +++ b/tests/bluejay-infra-lint/FleetManifestLintTests.cs @@ -227,6 +227,50 @@ public sealed class FleetManifestLintTests violations.Should().BeEmpty(); } + [Fact] + public void SignalControlDeployment_MustKeepAuthOffAndStageOidcSecret() + { + var deployment = Inventory.Documents.Single(document => + document.Kind == "Deployment" + && document.Namespace == "fc-signalcontrol" + && document.Name == "signalcontrol-web" + && document.RelativePath == "fc-signalcontrol/fc-signalcontrol.yaml"); + var container = deployment.MainContainerMappings().Single(container => + ManifestNodeExtensions.Scalar(container, "name") == "signalcontrol-web"); + + EnvValue(container, "Auth__Enabled").Should().Be("false"); + EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("false"); + EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true"); + EnvSecretName(container, "FlowerCore__Auth__Oidc__Authority").Should().Be("signalcontrol-oidc-client"); + EnvSecretKey(container, "FlowerCore__Auth__Oidc__Authority").Should().Be("issuer_url"); + EnvSecretName(container, "FlowerCore__Auth__Oidc__ClientId").Should().Be("signalcontrol-oidc-client"); + EnvSecretKey(container, "FlowerCore__Auth__Oidc__ClientId").Should().Be("client_id"); + EnvSecretName(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be("signalcontrol-oidc-client"); + EnvSecretKey(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be("client_secret"); + EnvSecretOptional(container, "FlowerCore__Auth__Oidc__Authority").Should().BeTrue(); + EnvSecretOptional(container, "FlowerCore__Auth__Oidc__ClientId").Should().BeTrue(); + EnvSecretOptional(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().BeTrue(); + } + + [Fact] + public void SignalControlDeployment_MustWirePirelayRelayBridgeSecret() + { + var deployment = Inventory.Documents.Single(document => + document.Kind == "Deployment" + && document.Namespace == "fc-signalcontrol" + && document.Name == "signalcontrol-web" + && document.RelativePath == "fc-signalcontrol/fc-signalcontrol.yaml"); + var container = deployment.MainContainerMappings().Single(container => + ManifestNodeExtensions.Scalar(container, "name") == "signalcontrol-web"); + + EnvValue(container, "TrafficSignal__RelayBridge__Enabled").Should().Be("true"); + EnvValue(container, "TrafficSignal__RelayBridge__BaseUrl").Should().Be("https://pirelay.iamworkin.lan"); + EnvSecretName(container, "TrafficSignal__RelayBridge__ApiKey").Should().Be("signalcontrol-pirelay"); + EnvSecretKey(container, "TrafficSignal__RelayBridge__ApiKey").Should().Be("ApiKey"); + EnvSecretOptional(container, "TrafficSignal__RelayBridge__ApiKey").Should().BeTrue(); + EnvValue(container, "LiveStatus__TrafficSignal__BaseAddress").Should().Be("https://signalcontrol.iamworkin.lan"); + } + [Fact] public void GitHubRunnerFleet_MustRegisterRequiredReposAsRepoScopedDeployments() { @@ -424,6 +468,36 @@ public sealed class FleetManifestLintTests monitoring.Should().Contain("alert_channel: irc"); } + [Fact] + public void Monitoring_MustScrapeSignalControlPiAppAndMountDashboard() + { + var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml")); + + monitoring.Should().Contain("job_name: \"signalcontrol-pi-app\""); + monitoring.Should().Contain("metrics_path: /metrics/prometheus"); + monitoring.Should().Contain("10.0.58.113:5200"); + monitoring.Should().Contain("host: \"signal-a.iamworkin.lan\""); + monitoring.Should().Contain("mountPath: /var/lib/grafana/dashboards/signalcontrol"); + monitoring.Should().Contain("name: grafana-dashboard-signalcontrol"); + } + + [Fact] + public void SignalControlGrafanaDashboard_MustCoverAppNodeAndPhysicalControlMetrics() + { + var dashboard = File.ReadAllText(Path.Combine( + Inventory.BluejayRoot, + "apps", + "monitoring", + "grafana-dashboard-signalcontrol.yaml")); + + dashboard.Should().Contain("uid\": \"flowercore-signalcontrol\""); + dashboard.Should().Contain("up{job=\\\"signalcontrol-pi-app\\\",instance=\\\"pirelay\\\"}"); + dashboard.Should().Contain("up{job=\\\"edge-nodes\\\",instance=\\\"pirelay\\\"}"); + dashboard.Should().Contain("signal_relay_writes_total"); + dashboard.Should().Contain("signal_schedule_fires_total"); + dashboard.Should().Contain("signalcontrol_screen_saver_enabled"); + } + [Fact] public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults() { @@ -762,6 +836,16 @@ public sealed class FleetManifestLintTests : null; } + private static bool EnvSecretOptional(YamlMappingNode container, string name) + { + return string.Equals( + EnvMapping(container, name) is { } env + ? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "optional") + : null, + "true", + StringComparison.Ordinal); + } + private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name) { return ManifestNodeExtensions.MappingSequence(container, "env")