monitoring: mirror Sprint 60 probe coverage
This commit is contained in:
@@ -216,19 +216,24 @@ data:
|
|||||||
- job_name: "pimanager-app"
|
- job_name: "pimanager-app"
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
metrics_path: /metrics
|
metrics_path: /metrics
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
insecure_skip_verify: true
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["10.0.58.25:5000"]
|
- targets: ["piez.iamworkin.lan"]
|
||||||
labels:
|
labels:
|
||||||
instance: "piez"
|
instance: "piez"
|
||||||
service: "pimanager"
|
service: "signalcontrol"
|
||||||
vlan: "home"
|
vlan: "home"
|
||||||
device: "pi4-ezconnect"
|
device: "pi4-ezconnect"
|
||||||
- targets: ["10.0.58.113:5200"]
|
rig: "signal-b"
|
||||||
|
- targets: ["pirelay.iamworkin.lan"]
|
||||||
labels:
|
labels:
|
||||||
instance: "pirelay"
|
instance: "pirelay"
|
||||||
service: "pimanager"
|
service: "signalcontrol"
|
||||||
vlan: "home"
|
vlan: "home"
|
||||||
device: "pi3-ks0212"
|
device: "pi3-ks0212"
|
||||||
|
rig: "signal-a"
|
||||||
|
|
||||||
# Epson ET-3750 EcoTank Printer SNMP
|
# Epson ET-3750 EcoTank Printer SNMP
|
||||||
- job_name: "snmp-printer"
|
- job_name: "snmp-printer"
|
||||||
@@ -488,6 +493,12 @@ data:
|
|||||||
- "https://desktop.iamworkin.lan/"
|
- "https://desktop.iamworkin.lan/"
|
||||||
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
||||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||||
|
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
|
||||||
|
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
|
||||||
|
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
|
||||||
|
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
|
||||||
|
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
|
||||||
|
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
|
||||||
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
||||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
||||||
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
||||||
@@ -911,12 +922,13 @@ data:
|
|||||||
# of idle and SNMP times out, so 5m for: would page nightly. A
|
# of idle and SNMP times out, so 5m for: would page nightly. A
|
||||||
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
||||||
- alert: EpsonPrinterDown
|
- alert: EpsonPrinterDown
|
||||||
expr: up{job="snmp-printer"} == 0
|
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
|
||||||
for: 30m
|
for: 30m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: info
|
||||||
|
alert_channel: irc
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
|
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
|
||||||
|
|
||||||
- alert: SynologyDiskLow
|
- alert: SynologyDiskLow
|
||||||
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
||||||
|
|||||||
Reference in New Issue
Block a user