Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
a3cd67d6bb monitoring: mirror Sprint 60 probe coverage 2026-06-04 13:15:18 -05:00

View File

@@ -216,19 +216,24 @@ data:
- job_name: "pimanager-app"
scrape_interval: 15s
metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
static_configs:
- targets: ["10.0.58.25:5000"]
- targets: ["piez.iamworkin.lan"]
labels:
instance: "piez"
service: "pimanager"
service: "signalcontrol"
vlan: "home"
device: "pi4-ezconnect"
- targets: ["10.0.58.113:5200"]
rig: "signal-b"
- targets: ["pirelay.iamworkin.lan"]
labels:
instance: "pirelay"
service: "pimanager"
service: "signalcontrol"
vlan: "home"
device: "pi3-ks0212"
rig: "signal-a"
# Epson ET-3750 EcoTank Printer SNMP
- job_name: "snmp-printer"
@@ -488,6 +493,12 @@ data:
- "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
@@ -911,12 +922,13 @@ data:
# of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m.
- alert: EpsonPrinterDown
expr: up{job="snmp-printer"} == 0
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
for: 30m
labels:
severity: warning
severity: info
alert_channel: irc
annotations:
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
- alert: SynologyDiskLow
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85