Compare commits

..

3 Commits

Author SHA1 Message Date
Andrew Stoltz
735f998197 fix(monitoring): probe OIDC-safe health routes 2026-06-04 00:23:48 -05:00
Andrew Stoltz
13f9bb7710 fix(distribution): revert OIDC enforcement — enabling it gated /healthz probe (service down)
Flipping Auth__Enabled=true gated the /healthz readiness probe (302->NotReady->
no endpoints->distribution.iamworkin.lan down, healthz=000). Classic
feedback_k8s_probes_behind_auth_middleware. Revert to false (OIDC env block kept,
gate off) to restore service. Proper fix (AllowAnonymous /healthz + CA-trust +
idempotent Editions seed + OIDC-challenge wiring + browser-proof) -> falcon OIDC lane.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 23:47:29 -05:00
Andrew Stoltz
9a58fd2af6 oidc: flip enforcement ON for knowledge + distribution (no-live-proof, fix-forward)
Operator 2026-06-04: nothing is production yet, flip OIDC + fix-forward (no
browser-proof gate). knowledge: Auth__Enabled false->true (OIDC env already
wired). distribution: add OIDC env block (Authority/Audience/ClientId=distribution,
ClientSecret from distribution-oidc-client) + Enabled=true; public read/entitlement
+ Method() allowlist stay open (OIDC gates admin only). Clients already provisioned
(secrets present). ArgoCD deploys both.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-03 23:38:48 -05:00
5 changed files with 139 additions and 184 deletions

View File

@@ -130,6 +130,27 @@ spec:
value: "Production" value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false" value: "false"
# Authentik/OIDC enforcement (flipped ON 2026-06-04, no-live-proof per operator;
# public read/entitlement + Method() allowlist stay open — OIDC gates admin only).
# Auth__Enabled reverted to false 2026-06-04: enabling it gated the
# /healthz readiness probe (probe->302->NotReady->endpoints drop->down).
# Re-enable once /healthz is AllowAnonymous (falcon OIDC lane).
- name: FlowerCore__Auth__Enabled
value: "false"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Authority
value: "https://id.iamworkin.lan/application/o/distribution/"
- name: FlowerCore__Auth__Oidc__Audience
value: "distribution"
- name: FlowerCore__Auth__Oidc__ClientId
value: "distribution"
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: distribution-oidc-client
key: client_secret
optional: true
# SQLite connection (catalog + data-protection keys via FlowerCoreDbContext). # SQLite connection (catalog + data-protection keys via FlowerCoreDbContext).
# Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins. # Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins.
- name: FlowerCore__Database__Provider - name: FlowerCore__Database__Provider

View File

@@ -93,6 +93,7 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "8080" prometheus.io/port: "8080"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec: spec:
securityContext: securityContext:
runAsNonRoot: true runAsNonRoot: true
@@ -123,11 +124,11 @@ spec:
value: "Production" value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false" value: "false"
# AuthentiK/OIDC is wired but not enforced until the # AuthentiK/OIDC is enforced. /healthz stays anonymous by contract;
# knowledge-oidc-client Secret is provisioned and # see flowercore.io/healthz-auth-policy above and the Sprint 58
# FlowerCore__Auth__Enabled is flipped to true. # OIDC readiness probe audit.
- name: FlowerCore__Auth__Enabled - name: FlowerCore__Auth__Enabled
value: "false" value: "true"
- name: FlowerCore__Auth__Oidc__Enabled - name: FlowerCore__Auth__Oidc__Enabled
value: "true" value: "true"
- name: FlowerCore__Auth__Oidc__Authority - name: FlowerCore__Auth__Oidc__Authority

View File

@@ -480,33 +480,28 @@ data:
- "https://argocd.iamworkin.lan/" - "https://argocd.iamworkin.lan/"
- "https://intranet.iamworkin.lan/" - "https://intranet.iamworkin.lan/"
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200 - "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl explicit health route
- "https://kiosk.iamworkin.lan/" - "https://kiosk.iamworkin.lan/"
- "https://media.iamworkin.lan/" - "https://media.iamworkin.lan/" # OIDC lane must add /healthz before flipping auth; live /healthz 404 on 2026-06-04
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200 - "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200 - "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://dns.iamworkin.lan/"
- "https://zabbix.iamworkin.lan/" - "https://zabbix.iamworkin.lan/"
- "https://flowercore.iamworkin.lan/healthz"
- "https://desktop.iamworkin.lan/" - "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/" - "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://chat.iamworkin.lan/" - "https://dns.iamworkin.lan/" # OIDC lane must add /healthz before flipping auth; live /healthz 404 on 2026-06-04
- "https://dist.iamworkin.lan/" - "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dms.iamworkin.lan/" - "https://dist.iamworkin.lan/healthz" # distribution OIDC flip outage was /healthz gating; probe the anonymous health route
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
- "https://menuboard.iamworkin.lan/" - "https://menuboard.iamworkin.lan/"
- "https://messageboard.iamworkin.lan/" - "https://messageboard.iamworkin.lan/"
- "https://presentations.iamworkin.lan/" - "https://presentations.iamworkin.lan/"
- "https://retail.iamworkin.lan/" - "https://retail.iamworkin.lan/"
- "https://ttsreader.iamworkin.lan/" - "https://ttsreader.iamworkin.lan/"
- "https://updates.iamworkin.lan/api/v1/manifests/_schema"
# Explicit healthcheck paths # Explicit healthcheck paths
- "https://library.iamworkin.lan/health"
- "https://aistation.iamworkin.lan/healthz"
- "https://knowledge.iamworkin.lan/healthz"
- "https://fc-llm-bridge.iamworkin.lan/healthz" - "https://fc-llm-bridge.iamworkin.lan/healthz"
- "https://acme.iamworkin.lan/health" - "https://acme.iamworkin.lan/health"
- "https://replay.iamworkin.lan/healthz"
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema"
- "https://worldbuilder.iamworkin.lan/healthz"
# Coverage gaps logged Q-MR-129/Q-MR-130: devices.iamworkin.lan
# returns 503 and e2e-test-pma/wpdemo only return 404.
# NOTE: services intentionally NOT in this probe surface # NOTE: services intentionally NOT in this probe surface
# - grafana.iamworkin.lan: every endpoint (incl. /api/health # - grafana.iamworkin.lan: every endpoint (incl. /api/health
# and /login) returns 401 behind Traefik basic-auth. # and /login) returns 401 behind Traefik basic-auth.
@@ -915,14 +910,11 @@ data:
# for: 30m absorbs sleep cycles. The EcoTank sleeps after ~5 min # for: 30m absorbs sleep cycles. The EcoTank sleeps after ~5 min
# of idle and SNMP times out, so 5m for: would page nightly. A # of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m. # genuine printer outage (jam, disconnected) lasts well over 30m.
# Use a range-window expression: instant up{} can go stale/absent
# after repeated snmp-exporter 500s.
- alert: EpsonPrinterDown - alert: EpsonPrinterDown
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1) expr: up{job="snmp-printer"} == 0
for: 30m for: 30m
labels: labels:
severity: warning severity: warning
alert_channel: irc
annotations: annotations:
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)" summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
@@ -1032,8 +1024,11 @@ data:
rules: rules:
- alert: KubeContainerRestartingFrequently - alert: KubeContainerRestartingFrequently
# Exclude github-runner: ephemeral runners register, run one job, # Exclude github-runner: ephemeral runners register, run one job,
# exit cleanly, then restart by design. # exit cleanly, and restart by design. Also require kube_pod_info so
expr: increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5 # deleted rollout pods do not keep firing from retained restart series.
expr: |
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
and on(namespace, pod) kube_pod_info
for: 15m for: 15m
labels: labels:
severity: warning severity: warning
@@ -1042,9 +1037,12 @@ data:
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason." description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
- alert: KubeContainerCrashLooping - alert: KubeContainerCrashLooping
# Exclude github-runner: ephemeral runners register, run one job, # Same github-runner/delete-retention exclusions as the hourly
# exit cleanly, then restart by design. # restart rule above; real runner failures are covered by the
expr: increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3 # dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts.
expr: |
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
and on(namespace, pod) kube_pod_info
for: 5m for: 5m
labels: labels:
severity: critical severity: critical
@@ -1072,7 +1070,9 @@ data:
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan." description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
- alert: KubeDeploymentReplicasMismatch - alert: KubeDeploymentReplicasMismatch
# Exclude github-runner: ephemeral runner deployments flap 0/1 between jobs by design. # github-runner has explicit runner-offline alerts; the generic
# replica-mismatch rule should not page on intentionally ephemeral
# 0/1 runner churn between CI jobs.
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"} expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
for: 15m for: 15m
labels: labels:
@@ -3652,38 +3652,6 @@ data:
relativeTimeRange: {from: 120, to: 0} relativeTimeRange: {from: 120, to: 0}
datasourceUid: __expr__ datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C} model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
- orgId: 1
name: SNMP Devices
folder: Infrastructure Alerts
interval: 1m
rules:
- uid: epson-printer-down-stale-window
title: EpsonPrinterDown
condition: C
for: 30m
noDataState: OK
execErrState: OK
annotations:
summary: Epson ET-3750 SNMP unreachable
description: The Epson ET-3750 snmp-printer target has reported only failed scrapes for at least 35 minutes.
runbook: "1. Check if printer is intentionally powered off 2. If printing needed: press power button on printer 3. Ping 10.0.58.107 after wake-up 4. Check WiFi on printer LCD if still unreachable"
labels:
severity: info
service: printer
alert_channel: irc
data:
- refId: A
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: prometheus
model: {expr: '(max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)', instant: true, refId: A}
- refId: B
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: __expr__
model: {type: reduce, expression: A, reducer: last, refId: B}
- refId: C
relativeTimeRange: {from: 2100, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
- orgId: 1 - orgId: 1
name: CI Runners name: CI Runners
folder: CI Alerts folder: CI Alerts

View File

@@ -424,6 +424,82 @@ public sealed class FleetManifestLintTests
monitoring.Should().Contain("alert_channel: irc"); monitoring.Should().Contain("alert_channel: irc");
} }
[Fact]
public void Monitoring_GenericKubernetesAlerts_MustExcludeEphemeralGithubRunnerNamespace()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}");
monitoring.Should().Contain("and on(namespace, pod) kube_pod_info");
monitoring.Should().Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
}
[Fact]
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("https://chat.iamworkin.lan/healthz");
monitoring.Should().Contain("https://dist.iamworkin.lan/healthz");
monitoring.Should().Contain("https://dms.iamworkin.lan/healthz");
monitoring.Should().Contain("https://print.iamworkin.lan/healthz");
monitoring.Should().Contain("https://knowledge.iamworkin.lan/healthz");
monitoring.Should().Contain("https://library.iamworkin.lan/health");
monitoring.Should().Contain("https://aistation.iamworkin.lan/healthz");
monitoring.Should().NotContain("https://print.iamworkin.lan/\"");
}
[Fact]
public void OidcEnforcedDeployments_WithHttpHealthzProbes_MustDeclareAnonymousHealthzContract()
{
var violations = Inventory.Documents
.Where(document => document.Kind == "Deployment")
.SelectMany(document => document.MainContainerMappings()
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Oidc__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
.Where(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz")
.Where(_ => !string.Equals(
PodAnnotation(document, "flowercore.io/healthz-auth-policy"),
"allow-anonymous",
StringComparison.Ordinal))
.Select(container =>
{
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
return $"{document.Descriptor} container '{containerName}' enforces OIDC while probing /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous.";
}))
.ToList();
violations.Should().BeEmpty();
}
[Fact]
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
{
var knowledge = Inventory.Documents
.Single(document => document.Kind == "Deployment" && document.Namespace == "knowledge" && document.Name == "knowledge-web");
var container = knowledge.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
PodAnnotation(knowledge, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
}
[Fact]
public void Distribution_OidcEnforcement_MustStayOffUntilHealthzAllowAnonymousProofLands()
{
var distribution = Inventory.Documents
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("false");
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().NotBe("allow-anonymous");
}
[Fact] [Fact]
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults() public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
{ {
@@ -768,6 +844,19 @@ public sealed class FleetManifestLintTests
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal)); .SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
} }
private static string? PodAnnotation(ManifestDocument document, string name)
{
return document.Scalar("spec", "template", "metadata", "annotations", name);
}
private static string? ProbeHttpGetPath(YamlMappingNode container, string probeKey)
{
return ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
&& ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)
? ManifestNodeExtensions.Scalar(httpGet, "path")
: null;
}
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments() private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
{ {
return Inventory.Documents return Inventory.Documents

View File

@@ -1,124 +0,0 @@
using FluentAssertions;
using System.Text.RegularExpressions;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class MonitoringCoverageLintTests
{
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
private static readonly string[] Sprint57ProbeTargets =
{
"https://dns.iamworkin.lan/",
"https://flowercore.iamworkin.lan/healthz",
"https://replay.iamworkin.lan/healthz",
"https://signalcontrol.iamworkin.lan/health",
"https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema",
"https://updates.iamworkin.lan/api/v1/manifests/_schema",
"https://worldbuilder.iamworkin.lan/healthz",
};
[Fact]
public void PrometheusScrape_MustNotTargetDeadPiManagerPort()
{
var monitoring = ReadMonitoringMirror();
monitoring.Should().NotContain("10.0.58.113:5100");
monitoring.Should().Contain("10.0.58.113:5200");
}
[Fact]
public void ProbeJobs_MustKeepEnvironmentSpecificBlackboxRelabels()
{
var monitoring = ReadMonitoringMirror();
var probeJobs = FindProbeJobs(monitoring);
probeJobs.Should().NotBeEmpty();
probeJobs.Should().OnlyContain(
job => job.Contains("replacement: blackbox-exporter.monitoring.svc:9115", StringComparison.Ordinal),
"the bluejay-infra mirror runs Prometheus in-cluster and should use the blackbox service DNS");
var livePodmanPrometheus = TryReadNotesMonitoringFile("prometheus.yml");
if (livePodmanPrometheus is not null)
{
FindProbeJobs(livePodmanPrometheus).Should().OnlyContain(
job => job.Contains("replacement: localhost:9115", StringComparison.Ordinal),
"live Podman monitoring uses host networking, so blackbox probes must relabel to localhost:9115");
}
}
[Fact]
public void TraefikServiceProbes_MustCoverSprint57LiveFlowerCoreHosts()
{
var monitoring = ReadMonitoringMirror();
foreach (var target in Sprint57ProbeTargets)
{
monitoring.Should().Contain(target);
}
}
[Fact]
public void EpsonPrinterDown_MustUseRangeWindowForStaleScrapeCoverage()
{
var alerts = ReadMonitoringMirror();
alerts.Should().Contain("- alert: EpsonPrinterDown");
alerts.Should().Contain("max_over_time(up{job=\"snmp-printer\"}[35m]) == bool 0");
alerts.Should().NotContain("expr: up{job=\"snmp-printer\"} == 0");
}
[Fact]
public void MonitoringMirror_MustCarryRunnerExclusionsAndEpsonGrafanaDelivery()
{
var mirror = ReadMonitoringMirror();
GetAlertBlock(mirror, "KubeContainerRestartingFrequently")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[1h]");
GetAlertBlock(mirror, "KubeContainerCrashLooping")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[15m]");
GetAlertBlock(mirror, "KubeDeploymentReplicasMismatch")
.Should()
.Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
mirror.Should().Contain("uid: epson-printer-down-stale-window");
mirror.Should().Contain("title: EpsonPrinterDown");
mirror.Should().Contain("alert_channel: irc");
}
private static string ReadMonitoringMirror() =>
File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
private static string? TryReadNotesMonitoringFile(string fileName)
{
var overrideRoot = Environment.GetEnvironmentVariable("FLOWERCORE_NOTES_ROOT");
if (string.IsNullOrWhiteSpace(overrideRoot))
{
return null;
}
var path = Path.Combine(overrideRoot, "scripts", "monitoring", fileName);
return File.ReadAllText(path);
}
private static IReadOnlyList<string> FindProbeJobs(string yaml) =>
Regex.Matches(
yaml,
"(?ms)^\\s+- job_name: \"probe-[^\"]+\".*?(?=^\\s+- job_name:|\\z)")
.Cast<Match>()
.Select(match => match.Value)
.ToList();
private static string GetAlertBlock(string yaml, string alertName)
{
var match = Regex.Match(
yaml,
$"(?ms)^\\s+- alert: {Regex.Escape(alertName)}\\s*$.*?(?=^\\s+- alert:|\\z)");
match.Success.Should().BeTrue($"alert {alertName} should be present in noc-monitoring.yaml");
return match.Value;
}
}