Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
b87df27844 fix(monitoring): probe OIDC-safe health routes 2026-06-04 01:44:42 -05:00
5 changed files with 10 additions and 25 deletions

View File

@@ -109,7 +109,6 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "8080" prometheus.io/port: "8080"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec: spec:
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server # Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only), # (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),

View File

@@ -101,7 +101,6 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "5320" prometheus.io/port: "5320"
prometheus.io/path: "/metrics/prometheus" prometheus.io/path: "/metrics/prometheus"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec: spec:
serviceAccountName: dns-web serviceAccountName: dns-web
securityContext: securityContext:

View File

@@ -131,7 +131,6 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "5200" prometheus.io/port: "5200"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec: spec:
nodeSelector: nodeSelector:
kubernetes.io/hostname: rke2-server kubernetes.io/hostname: rke2-server

View File

@@ -216,24 +216,19 @@ data:
- job_name: "pimanager-app" - job_name: "pimanager-app"
scrape_interval: 15s scrape_interval: 15s
metrics_path: /metrics metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
static_configs: static_configs:
- targets: ["piez.iamworkin.lan"] - targets: ["10.0.58.25:5000"]
labels: labels:
instance: "piez" instance: "piez"
service: "signalcontrol" service: "pimanager"
vlan: "home" vlan: "home"
device: "pi4-ezconnect" device: "pi4-ezconnect"
rig: "signal-b" - targets: ["10.0.58.113:5200"]
- targets: ["pirelay.iamworkin.lan"]
labels: labels:
instance: "pirelay" instance: "pirelay"
service: "signalcontrol" service: "pimanager"
vlan: "home" vlan: "home"
device: "pi3-ks0212" device: "pi3-ks0212"
rig: "signal-a"
# Epson ET-3750 EcoTank Printer SNMP # Epson ET-3750 EcoTank Printer SNMP
- job_name: "snmp-printer" - job_name: "snmp-printer"
@@ -493,12 +488,6 @@ data:
- "https://desktop.iamworkin.lan/" - "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200 - "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200 - "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips - "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200 - "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live - "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
@@ -922,13 +911,12 @@ data:
# of idle and SNMP times out, so 5m for: would page nightly. A # of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m. # genuine printer outage (jam, disconnected) lasts well over 30m.
- alert: EpsonPrinterDown - alert: EpsonPrinterDown
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1) expr: up{job="snmp-printer"} == 0
for: 30m for: 30m
labels: labels:
severity: info severity: warning
alert_channel: irc
annotations: annotations:
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)" summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
- alert: SynologyDiskLow - alert: SynologyDiskLow
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85 expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85

View File

@@ -487,16 +487,16 @@ public sealed class FleetManifestLintTests
} }
[Fact] [Fact]
public void Distribution_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest() public void Distribution_OidcEnforcement_MustStayOffUntilHealthzAllowAnonymousProofLands()
{ {
var distribution = Inventory.Documents var distribution = Inventory.Documents
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution"); .Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject; var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true"); EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true"); EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("false");
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz"); ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous"); PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().NotBe("allow-anonymous");
} }
[Fact] [Fact]