monitoring: mirror Sprint 60 probe coverage

This commit is contained in:
Andrew Stoltz
2026-06-04 13:15:18 -05:00
parent 81a3ddac4c
commit a3cd67d6bb

View File

@@ -216,19 +216,24 @@ data:
- job_name: "pimanager-app" - job_name: "pimanager-app"
scrape_interval: 15s scrape_interval: 15s
metrics_path: /metrics metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
static_configs: static_configs:
- targets: ["10.0.58.25:5000"] - targets: ["piez.iamworkin.lan"]
labels: labels:
instance: "piez" instance: "piez"
service: "pimanager" service: "signalcontrol"
vlan: "home" vlan: "home"
device: "pi4-ezconnect" device: "pi4-ezconnect"
- targets: ["10.0.58.113:5200"] rig: "signal-b"
- targets: ["pirelay.iamworkin.lan"]
labels: labels:
instance: "pirelay" instance: "pirelay"
service: "pimanager" service: "signalcontrol"
vlan: "home" vlan: "home"
device: "pi3-ks0212" device: "pi3-ks0212"
rig: "signal-a"
# Epson ET-3750 EcoTank Printer SNMP # Epson ET-3750 EcoTank Printer SNMP
- job_name: "snmp-printer" - job_name: "snmp-printer"
@@ -488,6 +493,12 @@ data:
- "https://desktop.iamworkin.lan/" - "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200 - "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200 - "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips - "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200 - "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live - "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
@@ -911,12 +922,13 @@ data:
# of idle and SNMP times out, so 5m for: would page nightly. A # of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m. # genuine printer outage (jam, disconnected) lasts well over 30m.
- alert: EpsonPrinterDown - alert: EpsonPrinterDown
expr: up{job="snmp-printer"} == 0 expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
for: 30m for: 30m
labels: labels:
severity: warning severity: info
alert_channel: irc
annotations: annotations:
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)" summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
- alert: SynologyDiskLow - alert: SynologyDiskLow
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85 expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85