[codex] monitoring: mirror Sprint 60 probe coverage #40
@@ -216,19 +216,24 @@ data:
|
||||
- job_name: "pimanager-app"
|
||||
scrape_interval: 15s
|
||||
metrics_path: /metrics
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
static_configs:
|
||||
- targets: ["10.0.58.25:5000"]
|
||||
- targets: ["piez.iamworkin.lan"]
|
||||
labels:
|
||||
instance: "piez"
|
||||
service: "pimanager"
|
||||
service: "signalcontrol"
|
||||
vlan: "home"
|
||||
device: "pi4-ezconnect"
|
||||
- targets: ["10.0.58.113:5200"]
|
||||
rig: "signal-b"
|
||||
- targets: ["pirelay.iamworkin.lan"]
|
||||
labels:
|
||||
instance: "pirelay"
|
||||
service: "pimanager"
|
||||
service: "signalcontrol"
|
||||
vlan: "home"
|
||||
device: "pi3-ks0212"
|
||||
rig: "signal-a"
|
||||
|
||||
# Epson ET-3750 EcoTank Printer SNMP
|
||||
- job_name: "snmp-printer"
|
||||
@@ -488,6 +493,12 @@ data:
|
||||
- "https://desktop.iamworkin.lan/"
|
||||
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
|
||||
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
|
||||
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
|
||||
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
|
||||
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
|
||||
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
|
||||
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
||||
@@ -911,12 +922,13 @@ data:
|
||||
# of idle and SNMP times out, so 5m for: would page nightly. A
|
||||
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
||||
- alert: EpsonPrinterDown
|
||||
expr: up{job="snmp-printer"} == 0
|
||||
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
severity: info
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
|
||||
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
|
||||
|
||||
- alert: SynologyDiskLow
|
||||
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
||||
|
||||
Reference in New Issue
Block a user