Compare commits

...

4 Commits

Author SHA1 Message Date
Andrew Stoltz
0ed9b989fa monitoring: mirror Sprint 57 coverage rules 2026-06-03 22:46:33 -05:00
Andrew Stoltz
404d884863 Adopt live Library Retail AiStation web apps 2026-06-03 20:24:32 -05:00
f4bd90f805 Merge pull request #33 from codex/s56-monitoring-coverage
fix(monitoring): repoint pirelay scrape to signalcontrol
2026-06-04 01:22:49 +00:00
Andrew Stoltz
67d67ab73d fix(monitoring): repoint pirelay scrape to signalcontrol 2026-06-03 20:20:36 -05:00
6 changed files with 688 additions and 6 deletions

2
.gitattributes vendored
View File

@@ -1,2 +1,4 @@
/.gitattributes text eol=lf
*.yaml text eol=lf
*.yml text eol=lf
*.sh text eol=lf

View File

@@ -0,0 +1,169 @@
# FlowerCore.AiStation.Web GitOps adoption manifest.
#
# Authored from the already-live fc-aistation resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: aistation-web-data
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-27448d6f-6e66-42a7-a293-73dd8bbd6b3e
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: aistation-web
strategy:
type: Recreate
template:
metadata:
annotations:
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
- envFrom:
- configMapRef:
name: aistation-web-config
image: localhost/fc-aistation-web:v20260602-aistation-owned-deploy-fix2
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: aistation-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /healthz
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: aistation-web-data
---
apiVersion: v1
kind: Service
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
clusterIP: 10.43.211.127
clusterIPs:
- 10.43.211.127
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: aistation-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: aistation-web-tls
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
dnsNames:
- aistation.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: aistation-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`aistation.iamworkin.lan`)
services:
- name: aistation-web
port: 80
tls:
secretName: aistation-web-tls

View File

@@ -0,0 +1,169 @@
# FlowerCore.Library.Web GitOps adoption manifest.
#
# Authored from the already-live fc-library resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: library-web-data
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-2690bae2-4ee0-417a-b95f-50ec5c632b63
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: library-web
strategy:
type: Recreate
template:
metadata:
annotations:
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
- envFrom:
- configMapRef:
name: library-web-config
image: localhost/fc-library-web:v20260602-library-owned-deploy-fix1
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: library-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: library-web-data
---
apiVersion: v1
kind: Service
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
clusterIP: 10.43.179.63
clusterIPs:
- 10.43.179.63
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: library-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: library-web-tls
namespace: fc-library
labels:
app.kubernetes.io/name: library-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
dnsNames:
- library.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: library-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`library.iamworkin.lan`)
services:
- name: library-web
port: 80
tls:
secretName: library-web-tls

View File

@@ -0,0 +1,170 @@
# FlowerCore.Retail.Web GitOps adoption manifest.
#
# Authored from the already-live fc-retail resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: retail-web-data
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-3d40b336-eab4-41b3-812c-d5e9413ce0ab
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: retail-web
strategy:
type: Recreate
template:
metadata:
annotations:
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
- envFrom:
- configMapRef:
name: retail-web-config
image: localhost/fc-retail-web:v20260602-retail-owned-deploy-fix5
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: retail-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: retail-web-data
---
apiVersion: v1
kind: Service
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
clusterIP: 10.43.239.8
clusterIPs:
- 10.43.239.8
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: retail-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: retail-web-tls
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
dnsNames:
- retail.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: retail-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`retail.iamworkin.lan`)
services:
- name: retail-web
port: 80
tls:
secretName: retail-web-tls

View File

@@ -223,7 +223,7 @@ data:
service: "pimanager"
vlan: "home"
device: "pi4-ezconnect"
- targets: ["10.0.58.113:5100"]
- targets: ["10.0.58.113:5200"]
labels:
instance: "pirelay"
service: "pimanager"
@@ -480,14 +480,16 @@ data:
- "https://argocd.iamworkin.lan/"
- "https://intranet.iamworkin.lan/"
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl explicit health route
- "https://kiosk.iamworkin.lan/"
- "https://media.iamworkin.lan/"
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://dns.iamworkin.lan/"
- "https://zabbix.iamworkin.lan/"
- "https://flowercore.iamworkin.lan/healthz"
- "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/"
- "https://dns.iamworkin.lan/"
- "https://chat.iamworkin.lan/"
- "https://dist.iamworkin.lan/"
- "https://dms.iamworkin.lan/"
@@ -496,9 +498,15 @@ data:
- "https://presentations.iamworkin.lan/"
- "https://retail.iamworkin.lan/"
- "https://ttsreader.iamworkin.lan/"
- "https://updates.iamworkin.lan/api/v1/manifests/_schema"
# Explicit healthcheck paths
- "https://fc-llm-bridge.iamworkin.lan/healthz"
- "https://acme.iamworkin.lan/health"
- "https://replay.iamworkin.lan/healthz"
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema"
- "https://worldbuilder.iamworkin.lan/healthz"
# Coverage gaps logged Q-MR-129/Q-MR-130: devices.iamworkin.lan
# returns 503 and e2e-test-pma/wpdemo only return 404.
# NOTE: services intentionally NOT in this probe surface
# - grafana.iamworkin.lan: every endpoint (incl. /api/health
# and /login) returns 401 behind Traefik basic-auth.
@@ -907,11 +915,14 @@ data:
# for: 30m absorbs sleep cycles. The EcoTank sleeps after ~5 min
# of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m.
# Use a range-window expression: instant up{} can go stale/absent
# after repeated snmp-exporter 500s.
- alert: EpsonPrinterDown
expr: up{job="snmp-printer"} == 0
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
for: 30m
labels:
severity: warning
alert_channel: irc
annotations:
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
@@ -1020,7 +1031,9 @@ data:
- name: kubernetes-state
rules:
- alert: KubeContainerRestartingFrequently
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
# Exclude github-runner: ephemeral runners register, run one job,
# exit cleanly, then restart by design.
expr: increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
for: 15m
labels:
severity: warning
@@ -1029,7 +1042,9 @@ data:
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
- alert: KubeContainerCrashLooping
expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
# Exclude github-runner: ephemeral runners register, run one job,
# exit cleanly, then restart by design.
expr: increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
for: 5m
labels:
severity: critical
@@ -1057,7 +1072,8 @@ data:
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
- alert: KubeDeploymentReplicasMismatch
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
# Exclude github-runner: ephemeral runner deployments flap 0/1 between jobs by design.
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
for: 15m
labels:
severity: warning
@@ -3636,6 +3652,38 @@ data:
relativeTimeRange: {from: 120, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
- orgId: 1
name: SNMP Devices
folder: Infrastructure Alerts
interval: 1m
rules:
- uid: epson-printer-down-stale-window
title: EpsonPrinterDown
condition: C
for: 30m
noDataState: OK
execErrState: OK
annotations:
summary: Epson ET-3750 SNMP unreachable
description: The Epson ET-3750 snmp-printer target has reported only failed scrapes for at least 35 minutes.
runbook: "1. Check if printer is intentionally powered off 2. If printing needed: press power button on printer 3. Ping 10.0.58.107 after wake-up 4. Check WiFi on printer LCD if still unreachable"
labels:
severity: info
service: printer
alert_channel: irc
data:
- refId: A
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: prometheus
model: {expr: '(max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)', instant: true, refId: A}
- refId: B
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: __expr__
model: {type: reduce, expression: A, reducer: last, refId: B}
- refId: C
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
- orgId: 1
name: CI Runners
folder: CI Alerts

View File

@@ -0,0 +1,124 @@
using FluentAssertions;
using System.Text.RegularExpressions;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class MonitoringCoverageLintTests
{
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
private static readonly string[] Sprint57ProbeTargets =
{
"https://dns.iamworkin.lan/",
"https://flowercore.iamworkin.lan/healthz",
"https://replay.iamworkin.lan/healthz",
"https://signalcontrol.iamworkin.lan/health",
"https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema",
"https://updates.iamworkin.lan/api/v1/manifests/_schema",
"https://worldbuilder.iamworkin.lan/healthz",
};
[Fact]
public void PrometheusScrape_MustNotTargetDeadPiManagerPort()
{
var monitoring = ReadMonitoringMirror();
monitoring.Should().NotContain("10.0.58.113:5100");
monitoring.Should().Contain("10.0.58.113:5200");
}
[Fact]
public void ProbeJobs_MustKeepEnvironmentSpecificBlackboxRelabels()
{
var monitoring = ReadMonitoringMirror();
var probeJobs = FindProbeJobs(monitoring);
probeJobs.Should().NotBeEmpty();
probeJobs.Should().OnlyContain(
job => job.Contains("replacement: blackbox-exporter.monitoring.svc:9115", StringComparison.Ordinal),
"the bluejay-infra mirror runs Prometheus in-cluster and should use the blackbox service DNS");
var livePodmanPrometheus = TryReadNotesMonitoringFile("prometheus.yml");
if (livePodmanPrometheus is not null)
{
FindProbeJobs(livePodmanPrometheus).Should().OnlyContain(
job => job.Contains("replacement: localhost:9115", StringComparison.Ordinal),
"live Podman monitoring uses host networking, so blackbox probes must relabel to localhost:9115");
}
}
[Fact]
public void TraefikServiceProbes_MustCoverSprint57LiveFlowerCoreHosts()
{
var monitoring = ReadMonitoringMirror();
foreach (var target in Sprint57ProbeTargets)
{
monitoring.Should().Contain(target);
}
}
[Fact]
public void EpsonPrinterDown_MustUseRangeWindowForStaleScrapeCoverage()
{
var alerts = ReadMonitoringMirror();
alerts.Should().Contain("- alert: EpsonPrinterDown");
alerts.Should().Contain("max_over_time(up{job=\"snmp-printer\"}[35m]) == bool 0");
alerts.Should().NotContain("expr: up{job=\"snmp-printer\"} == 0");
}
[Fact]
public void MonitoringMirror_MustCarryRunnerExclusionsAndEpsonGrafanaDelivery()
{
var mirror = ReadMonitoringMirror();
GetAlertBlock(mirror, "KubeContainerRestartingFrequently")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[1h]");
GetAlertBlock(mirror, "KubeContainerCrashLooping")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[15m]");
GetAlertBlock(mirror, "KubeDeploymentReplicasMismatch")
.Should()
.Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
mirror.Should().Contain("uid: epson-printer-down-stale-window");
mirror.Should().Contain("title: EpsonPrinterDown");
mirror.Should().Contain("alert_channel: irc");
}
private static string ReadMonitoringMirror() =>
File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
private static string? TryReadNotesMonitoringFile(string fileName)
{
var overrideRoot = Environment.GetEnvironmentVariable("FLOWERCORE_NOTES_ROOT");
if (string.IsNullOrWhiteSpace(overrideRoot))
{
return null;
}
var path = Path.Combine(overrideRoot, "scripts", "monitoring", fileName);
return File.ReadAllText(path);
}
private static IReadOnlyList<string> FindProbeJobs(string yaml) =>
Regex.Matches(
yaml,
"(?ms)^\\s+- job_name: \"probe-[^\"]+\".*?(?=^\\s+- job_name:|\\z)")
.Cast<Match>()
.Select(match => match.Value)
.ToList();
private static string GetAlertBlock(string yaml, string alertName)
{
var match = Regex.Match(
yaml,
$"(?ms)^\\s+- alert: {Regex.Escape(alertName)}\\s*$.*?(?=^\\s+- alert:|\\z)");
match.Success.Should().BeTrue($"alert {alertName} should be present in noc-monitoring.yaml");
return match.Value;
}
}