monitoring: mirror Sprint 57 coverage rules

This commit is contained in:
Andrew Stoltz
2026-06-03 22:46:33 -05:00
parent 404d884863
commit 0ed9b989fa
2 changed files with 177 additions and 5 deletions

View File

@@ -0,0 +1,124 @@
using FluentAssertions;
using System.Text.RegularExpressions;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class MonitoringCoverageLintTests
{
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
private static readonly string[] Sprint57ProbeTargets =
{
"https://dns.iamworkin.lan/",
"https://flowercore.iamworkin.lan/healthz",
"https://replay.iamworkin.lan/healthz",
"https://signalcontrol.iamworkin.lan/health",
"https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema",
"https://updates.iamworkin.lan/api/v1/manifests/_schema",
"https://worldbuilder.iamworkin.lan/healthz",
};
[Fact]
public void PrometheusScrape_MustNotTargetDeadPiManagerPort()
{
var monitoring = ReadMonitoringMirror();
monitoring.Should().NotContain("10.0.58.113:5100");
monitoring.Should().Contain("10.0.58.113:5200");
}
[Fact]
public void ProbeJobs_MustKeepEnvironmentSpecificBlackboxRelabels()
{
var monitoring = ReadMonitoringMirror();
var probeJobs = FindProbeJobs(monitoring);
probeJobs.Should().NotBeEmpty();
probeJobs.Should().OnlyContain(
job => job.Contains("replacement: blackbox-exporter.monitoring.svc:9115", StringComparison.Ordinal),
"the bluejay-infra mirror runs Prometheus in-cluster and should use the blackbox service DNS");
var livePodmanPrometheus = TryReadNotesMonitoringFile("prometheus.yml");
if (livePodmanPrometheus is not null)
{
FindProbeJobs(livePodmanPrometheus).Should().OnlyContain(
job => job.Contains("replacement: localhost:9115", StringComparison.Ordinal),
"live Podman monitoring uses host networking, so blackbox probes must relabel to localhost:9115");
}
}
[Fact]
public void TraefikServiceProbes_MustCoverSprint57LiveFlowerCoreHosts()
{
var monitoring = ReadMonitoringMirror();
foreach (var target in Sprint57ProbeTargets)
{
monitoring.Should().Contain(target);
}
}
[Fact]
public void EpsonPrinterDown_MustUseRangeWindowForStaleScrapeCoverage()
{
var alerts = ReadMonitoringMirror();
alerts.Should().Contain("- alert: EpsonPrinterDown");
alerts.Should().Contain("max_over_time(up{job=\"snmp-printer\"}[35m]) == bool 0");
alerts.Should().NotContain("expr: up{job=\"snmp-printer\"} == 0");
}
[Fact]
public void MonitoringMirror_MustCarryRunnerExclusionsAndEpsonGrafanaDelivery()
{
var mirror = ReadMonitoringMirror();
GetAlertBlock(mirror, "KubeContainerRestartingFrequently")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[1h]");
GetAlertBlock(mirror, "KubeContainerCrashLooping")
.Should()
.Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}[15m]");
GetAlertBlock(mirror, "KubeDeploymentReplicasMismatch")
.Should()
.Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
mirror.Should().Contain("uid: epson-printer-down-stale-window");
mirror.Should().Contain("title: EpsonPrinterDown");
mirror.Should().Contain("alert_channel: irc");
}
private static string ReadMonitoringMirror() =>
File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
private static string? TryReadNotesMonitoringFile(string fileName)
{
var overrideRoot = Environment.GetEnvironmentVariable("FLOWERCORE_NOTES_ROOT");
if (string.IsNullOrWhiteSpace(overrideRoot))
{
return null;
}
var path = Path.Combine(overrideRoot, "scripts", "monitoring", fileName);
return File.ReadAllText(path);
}
private static IReadOnlyList<string> FindProbeJobs(string yaml) =>
Regex.Matches(
yaml,
"(?ms)^\\s+- job_name: \"probe-[^\"]+\".*?(?=^\\s+- job_name:|\\z)")
.Cast<Match>()
.Select(match => match.Value)
.ToList();
private static string GetAlertBlock(string yaml, string alertName)
{
var match = Regex.Match(
yaml,
$"(?ms)^\\s+- alert: {Regex.Escape(alertName)}\\s*$.*?(?=^\\s+- alert:|\\z)");
match.Success.Should().BeTrue($"alert {alertName} should be present in noc-monitoring.yaml");
return match.Value;
}
}