diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cc8d52e --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +# .NET build outputs (lint test project) +**/bin/ +**/obj/ + +# Editor / temp +.DS_Store +*.swp diff --git a/README.md b/README.md index 137e184..7a4ae51 100644 --- a/README.md +++ b/README.md @@ -99,8 +99,22 @@ curl -sk -X DELETE https://dns.iamworkin.lan/api/v1/servers//zones/iam - **CoreDNS template + ndots:5 collision**: inside pods, `..svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (``) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`. - **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2. - **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`. +- **IngressRoute namespace split**: this RKE2 Traefik install does not allow cross-namespace service refs. Keep the `IngressRoute`, backend `Service`, and TLS secret in the same namespace; if one host is shared across namespaces, duplicate the `Certificate` and move the route next to the destination service. +- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods. +- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow. +- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs. - **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL. +## Local manifest lint + +The repo now carries a local-first lint pass for the recurring K8s gotchas that have burned the fleet: + +```bash +dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release +``` + +That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`. + ## References - Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md` diff --git a/apps/asterisk/deployment.yaml b/apps/asterisk/deployment.yaml index e81dd9d..e0da3e6 100644 --- a/apps/asterisk/deployment.yaml +++ b/apps/asterisk/deployment.yaml @@ -16,13 +16,25 @@ spec: metadata: labels: app: asterisk - spec: - nodeSelector: - kubernetes.io/hostname: rke2-agent1 - hostNetwork: true - dnsPolicy: ClusterFirstWithHostNet - securityContext: - fsGroup: 0 + spec: + nodeSelector: + kubernetes.io/hostname: rke2-agent1 + hostNetwork: true + # Keep the search list free of iamworkin.lan so CoreDNS's wildcard + # template cannot hijack public egress like downloads.asterisk.org. + dnsPolicy: None + dnsConfig: + nameservers: + - 10.43.0.10 + searches: + - telephony.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "2" + securityContext: + fsGroup: 0 # CoreDNS in this cluster has an iamworkin.lan wildcard that catches # any unresolved name and returns 10.0.56.200 (Traefik VIP), which # means downloads.asterisk.org inside the pod resolves to Traefik and diff --git a/apps/fc-llm-bridge/fc-llm-bridge.yaml b/apps/fc-llm-bridge/fc-llm-bridge.yaml index 0977e69..476b799 100644 --- a/apps/fc-llm-bridge/fc-llm-bridge.yaml +++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml @@ -87,6 +87,20 @@ spec: prometheus.io/port: "8080" prometheus.io/path: "/metrics" spec: + # Use an explicit DNS policy so external FQDNs like api.anthropic.com are + # resolved directly instead of being expanded through the cluster search + # path that includes iamworkin.lan. + dnsPolicy: None + dnsConfig: + nameservers: + - 10.43.0.10 + searches: + - fc-llm-bridge.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "2" securityContext: fsGroup: 1654 fsGroupChangePolicy: OnRootMismatch @@ -211,17 +225,6 @@ spec: port: 8080 initialDelaySeconds: 15 periodSeconds: 30 - # Lower ndots so external FQDNs like api.anthropic.com are tried BEFORE - # the ndots:5 default expands them through the cluster search path, which - # includes iamworkin.lan. CoreDNS has a `template IN A iamworkin.lan` - # wildcard that answers `api.anthropic.com.iamworkin.lan` with the - # Traefik VIP, which then serves a TRAEFIK-DEFAULT-CERT TLS cert and - # breaks egress to the real Anthropic API (memory: - # feedback_coredns_ndots_template_collision, generalized to external DNS). - dnsConfig: - options: - - name: ndots - value: "2" volumes: - name: data persistentVolumeClaim: diff --git a/apps/fc-messageboard/fc-messageboard.yaml b/apps/fc-messageboard/fc-messageboard.yaml index 67a6f13..502221f 100644 --- a/apps/fc-messageboard/fc-messageboard.yaml +++ b/apps/fc-messageboard/fc-messageboard.yaml @@ -69,16 +69,14 @@ spec: memory: "512Mi" cpu: "500m" livenessProbe: - httpGet: - path: /health + tcpSocket: port: 8080 initialDelaySeconds: 10 periodSeconds: 30 timeoutSeconds: 5 failureThreshold: 3 readinessProbe: - httpGet: - path: /health + tcpSocket: port: 8080 initialDelaySeconds: 10 periodSeconds: 10 diff --git a/apps/fc-signalcontrol/fc-signalcontrol.yaml b/apps/fc-signalcontrol/fc-signalcontrol.yaml index 3e59c2f..492fa55 100644 --- a/apps/fc-signalcontrol/fc-signalcontrol.yaml +++ b/apps/fc-signalcontrol/fc-signalcontrol.yaml @@ -76,15 +76,13 @@ spec: memory: "512Mi" cpu: "500m" livenessProbe: - httpGet: - path: /health + tcpSocket: port: http initialDelaySeconds: 30 periodSeconds: 30 timeoutSeconds: 5 readinessProbe: - httpGet: - path: /health + tcpSocket: port: http initialDelaySeconds: 10 periodSeconds: 10 diff --git a/apps/fc-ttsreader/fc-ttsreader.yaml b/apps/fc-ttsreader/fc-ttsreader.yaml index 6023a49..a1d13ba 100644 --- a/apps/fc-ttsreader/fc-ttsreader.yaml +++ b/apps/fc-ttsreader/fc-ttsreader.yaml @@ -37,6 +37,19 @@ spec: app.kubernetes.io/name: ttsreader-piper app.kubernetes.io/part-of: flowercore spec: + # Bypass CoreDNS's *.iamworkin.lan wildcard so the init container reaches + # huggingface.co directly when it seeds voice models. + dnsPolicy: None + dnsConfig: + nameservers: + - 10.43.0.10 + searches: + - fc-ttsreader.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "2" initContainers: - name: seed-voices image: rhasspy/wyoming-piper:latest diff --git a/tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj b/tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj new file mode 100644 index 0000000..0545d1c --- /dev/null +++ b/tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj @@ -0,0 +1,24 @@ + + + net10.0 + enable + enable + false + true + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + diff --git a/tests/bluejay-infra-lint/FleetManifestLintTests.cs b/tests/bluejay-infra-lint/FleetManifestLintTests.cs new file mode 100644 index 0000000..10c7b97 --- /dev/null +++ b/tests/bluejay-infra-lint/FleetManifestLintTests.cs @@ -0,0 +1,566 @@ +using FluentAssertions; +using System.Text.RegularExpressions; +using Xunit; +using YamlDotNet.Core; +using YamlDotNet.RepresentationModel; + +namespace BluejayInfraLint.Tests; + +[Trait("Category", "Unit")] +public sealed class FleetManifestLintTests +{ + private static readonly ManifestInventory Inventory = ManifestInventory.Load(); + + private static readonly HashSet PublicReadOnlyHosts = new(StringComparer.Ordinal) + { + "dist.flowercore.io", + "dns.iamworkin.lan", + }; + + private static readonly HashSet ApiKeyProtectedDeployments = new(StringComparer.Ordinal) + { + "messageboard-web", + "scoreboard-web", + "segmentdisplay-web", + "signalcontrol-web", + }; + + private static readonly HashSet PublicEgressDeployments = new(StringComparer.Ordinal) + { + "asterisk", + "fc-llm-bridge", + "mysql-web", + "php-web", + "ttsreader-align", + "ttsreader-kokoro", + "ttsreader-modern", + "ttsreader-piper", + }; + + [Fact] + public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace() + { + var violations = Inventory.Documents + .Where(document => document.Kind == "IngressRoute") + .SelectMany(document => + document.MappingSequence("spec", "routes") + .SelectMany(route => + route.MappingSequence("services") + .Select(service => new + { + Document = document, + ServiceName = ManifestNodeExtensions.Scalar(service, "name"), + ServiceNamespace = ManifestNodeExtensions.Scalar(service, "namespace"), + }))) + .Where(entry => !string.IsNullOrWhiteSpace(entry.ServiceNamespace)) + .Where(entry => !string.Equals(entry.ServiceNamespace, entry.Document.Namespace, StringComparison.Ordinal)) + .Select(entry => + $"{entry.Document.Descriptor} references Service '{entry.ServiceName}' in namespace '{entry.ServiceNamespace}'.") + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void PublicReadOnlyIngressRoutes_MustExplicitlyAllowOnlyGetAndHead() + { + var violations = Inventory.Documents + .Where(document => document.Kind == "IngressRoute") + .SelectMany(document => + document.MappingSequence("spec", "routes") + .Select(route => new + { + Document = document, + Match = ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty, + })) + .Where(entry => PublicReadOnlyHosts.Any(host => entry.Match.Contains($"Host(`{host}`)", StringComparison.Ordinal))) + .Where(entry => !entry.Match.Contains("Method(`GET`)", StringComparison.Ordinal) + || !entry.Match.Contains("Method(`HEAD`)", StringComparison.Ordinal)) + .Select(entry => $"{entry.Document.Descriptor} is missing an explicit GET/HEAD method allowlist.") + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void TraefikVipNetworkPolicies_MustAllowPostDnatBackendPorts() + { + var violations = Inventory.Documents + .Where(document => document.Kind == "NetworkPolicy") + .Where(document => document.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal))) + .SelectMany(document => + { + var ports = document.EgressPorts().ToHashSet(StringComparer.Ordinal); + var localViolations = new List(); + + if (ports.Contains("443") && !ports.Contains("8443")) + { + localViolations.Add($"{document.Descriptor} allows Traefik VIP 443 without backend port 8443."); + } + + if (ports.Contains("80") && !ports.Contains("8000") && !ports.Contains("8080")) + { + localViolations.Add($"{document.Descriptor} allows Traefik VIP 80 without a backend HTTP port (8000/8080)."); + } + + return localViolations; + }) + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void ApiKeyProtectedDeployments_MustUseTcpSocketHealthProbes() + { + var violations = Inventory.Documents + .Where(document => document.Kind == "Deployment") + .Where(document => ApiKeyProtectedDeployments.Contains(document.Name)) + .SelectMany(document => document.ContainerMappings().SelectMany(container => + ProbeViolations(document, container, "readinessProbe") + .Concat(ProbeViolations(document, container, "livenessProbe")))) + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults() + { + var violations = Inventory.Documents + .Where(document => document.Kind == "StatefulSet") + .Where(document => document.MappingSequence("spec", "volumeClaimTemplates").Count > 0) + .SelectMany(document => + { + var localViolations = new List(); + + if (string.IsNullOrWhiteSpace(document.Scalar("spec", "podManagementPolicy"))) + { + localViolations.Add($"{document.Descriptor} is missing spec.podManagementPolicy."); + } + + if (string.IsNullOrWhiteSpace(document.Scalar("spec", "revisionHistoryLimit"))) + { + localViolations.Add($"{document.Descriptor} is missing spec.revisionHistoryLimit."); + } + + foreach (var claimTemplate in document.MappingSequence("spec", "volumeClaimTemplates")) + { + if (!string.Equals( + ManifestNodeExtensions.Scalar(claimTemplate, "spec", "volumeMode"), + "Filesystem", + StringComparison.Ordinal)) + { + var claimName = ManifestNodeExtensions.Scalar(claimTemplate, "metadata", "name") ?? ""; + localViolations.Add($"{document.Descriptor} volumeClaimTemplate '{claimName}' is missing volumeMode: Filesystem."); + } + } + + return localViolations; + }) + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void LocallyImportedImages_MustUseLocalhostPrefixAndNeverPullPolicy() + { + var violations = Inventory.Documents + .Where(document => document.PodSpec() is not null) + .SelectMany(document => document.ContainerSpecs() + .Where(container => !string.IsNullOrWhiteSpace(container.Image)) + .Select(container => new + { + Document = document, + Container = container, + })) + .Where(entry => + (entry.Container.Image.StartsWith("localhost/", StringComparison.Ordinal) + && !string.Equals(entry.Container.ImagePullPolicy, "Never", StringComparison.Ordinal)) + || (entry.Container.Image.StartsWith("fc-", StringComparison.Ordinal) + && !entry.Container.Image.Contains('/', StringComparison.Ordinal))) + .Select(entry => + { + if (entry.Container.Image.StartsWith("localhost/", StringComparison.Ordinal)) + { + return $"{entry.Document.Descriptor} container '{entry.Container.Name}' uses {entry.Container.Image} without imagePullPolicy: Never."; + } + + return $"{entry.Document.Descriptor} container '{entry.Container.Name}' uses non-local image '{entry.Container.Image}' for a node-imported FlowerCore workload."; + }) + .ToList(); + + violations.Should().BeEmpty(); + } + + [Fact] + public void PublicEgressDeployments_MustOptOutOfIamworkinLanSearchSuffixes() + { + var violations = Inventory.Documents + .Where(document => document.PodSpec() is not null) + .Where(document => PublicEgressDeployments.Contains(document.Name)) + .SelectMany(document => + { + var localViolations = new List(); + var podSpec = document.PodSpec()!; + var dnsPolicy = ManifestNodeExtensions.Scalar(podSpec, "dnsPolicy"); + var searches = ManifestNodeExtensions.ScalarSequence(podSpec, "dnsConfig", "searches").ToList(); + + if (!string.Equals(dnsPolicy, "None", StringComparison.Ordinal)) + { + localViolations.Add($"{document.Descriptor} is missing dnsPolicy: None."); + } + + if (searches.Count == 0) + { + localViolations.Add($"{document.Descriptor} is missing dnsConfig.searches."); + } + else if (searches.Any(search => search.Contains("iamworkin.lan", StringComparison.OrdinalIgnoreCase))) + { + localViolations.Add($"{document.Descriptor} still includes iamworkin.lan in dnsConfig.searches."); + } + + return localViolations; + }) + .ToList(); + + violations.Should().BeEmpty(); + } + + private static IEnumerable ProbeViolations( + ManifestDocument document, + YamlMappingNode container, + string probeKey) + { + if (!ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe) + || !ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)) + { + return Array.Empty(); + } + + var path = ManifestNodeExtensions.Scalar(httpGet, "path"); + if (!string.Equals(path, "/health", StringComparison.Ordinal)) + { + return Array.Empty(); + } + + var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? ""; + return new[] + { + $"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.", + }; + } +} + +internal sealed class ManifestInventory +{ + private ManifestInventory(string workspaceRoot, string bluejayRoot, IReadOnlyList documents) + { + WorkspaceRoot = workspaceRoot; + BluejayRoot = bluejayRoot; + Documents = documents; + } + + public string WorkspaceRoot { get; } + + public string BluejayRoot { get; } + + public IReadOnlyList Documents { get; } + + public static ManifestInventory Load() + { + var bluejayRoot = FindBluejayInfraRoot(); + var workspaceRoot = Directory.GetParent(bluejayRoot)?.FullName + ?? throw new DirectoryNotFoundException($"Could not resolve workspace root from '{bluejayRoot}'."); + + var documents = ManifestRoots(workspaceRoot, bluejayRoot) + .SelectMany(LoadDocumentsFromRoot) + .ToList(); + + return new ManifestInventory(workspaceRoot, bluejayRoot, documents); + } + + private static string FindBluejayInfraRoot() + { + var current = new DirectoryInfo(AppContext.BaseDirectory); + while (current is not null) + { + if (Directory.Exists(Path.Combine(current.FullName, "apps")) + && File.Exists(Path.Combine(current.FullName, "README.md"))) + { + return current.FullName; + } + + current = current.Parent; + } + + throw new DirectoryNotFoundException("Could not find the bluejay-infra repository root from the test output directory."); + } + + private static IEnumerable ManifestRoots(string workspaceRoot, string bluejayRoot) + { + var roots = new[] + { + Path.Combine(bluejayRoot, "apps"), + Path.Combine(workspaceRoot, "FlowerCore.Chat", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.DMS", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.DNS", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Intranet.Web", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Kiosk", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Media", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.MenuBoard", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.MessageBoard", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.MySQL", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.PHP", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Presentations", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Print.Web", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.RemoteDesktop", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Scoreboard", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.SegmentDisplay", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.SignalControl", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.TtsReader", "k8s"), + Path.Combine(workspaceRoot, "FlowerCore.Updater", "k8s"), + }; + + return roots.Where(Directory.Exists); + } + + private static IEnumerable LoadDocumentsFromRoot(string root) + { + foreach (var filePath in Directory.EnumerateFiles(root, "*.yaml", SearchOption.AllDirectories)) + { + var fileText = File.ReadAllText(filePath); + var segments = SplitManifestDocuments(fileText); + + for (var index = 0; index < segments.Count; index++) + { + var yaml = new YamlStream(); + try + { + using var reader = new StringReader(segments[index]); + yaml.Load(reader); + } + catch (YamlException exception) + { + _ = exception; + continue; + } + + if (yaml.Documents.Count == 0) + { + continue; + } + + if (yaml.Documents[0].RootNode is YamlMappingNode mapping + && ManifestNodeExtensions.Scalar(mapping, "kind") is not null) + { + yield return new ManifestDocument(root, filePath, index, fileText, mapping); + } + } + } + } + + private static IReadOnlyList SplitManifestDocuments(string fileText) + { + var documents = new List(); + var currentLines = new List(); + var seenApiVersion = false; + + foreach (var line in Regex.Split(fileText, @"\r?\n")) + { + if (Regex.IsMatch(line, @"^\s*---\s*$")) + { + FlushCurrentDocument(); + continue; + } + + if (Regex.IsMatch(line, @"^\s*apiVersion:\s*") + && seenApiVersion + && currentLines.Any(existing => !string.IsNullOrWhiteSpace(existing))) + { + FlushCurrentDocument(); + } + + currentLines.Add(line); + if (Regex.IsMatch(line, @"^\s*apiVersion:\s*")) + { + seenApiVersion = true; + } + } + + FlushCurrentDocument(); + return documents; + + void FlushCurrentDocument() + { + var text = string.Join(Environment.NewLine, currentLines).Trim(); + if (!string.IsNullOrWhiteSpace(text)) + { + documents.Add(text); + } + + currentLines.Clear(); + seenApiVersion = false; + } + } +} + +internal sealed record ManifestDocument( + string RootPath, + string FilePath, + int DocumentIndex, + string FileText, + YamlMappingNode Root) +{ + public string Kind => Scalar("kind") ?? string.Empty; + + public string Name => Scalar("metadata", "name") ?? $"document-{DocumentIndex}"; + + public string Namespace => Scalar("metadata", "namespace") ?? string.Empty; + + public string RelativePath => Path.GetRelativePath(RootPath, FilePath).Replace('\\', '/'); + + public string Descriptor => $"{Kind} {Namespace}/{Name} [{RelativePath}#{DocumentIndex + 1}]"; + + public string? Scalar(params string[] path) => ManifestNodeExtensions.Scalar(Root, path); + + public IReadOnlyList MappingSequence(params string[] path) => ManifestNodeExtensions.MappingSequence(Root, path); + + public IEnumerable AllScalars() => ManifestNodeExtensions.AllScalars(Root); + + public IReadOnlyList EgressPorts() + { + return MappingSequence("spec", "egress") + .SelectMany(egressRule => ManifestNodeExtensions.MappingSequence(egressRule, "ports")) + .Select(portMapping => ManifestNodeExtensions.Scalar(portMapping, "port")) + .Where(value => !string.IsNullOrWhiteSpace(value)) + .Cast() + .ToList(); + } + + public YamlMappingNode? PodSpec() + { + return Kind switch + { + "Deployment" or "StatefulSet" or "DaemonSet" or "Job" => + ManifestNodeExtensions.Mapping(Root, "spec", "template", "spec"), + "CronJob" => + ManifestNodeExtensions.Mapping(Root, "spec", "jobTemplate", "spec", "template", "spec"), + _ => null, + }; + } + + public IReadOnlyList ContainerMappings() + { + var podSpec = PodSpec(); + if (podSpec is null) + { + return Array.Empty(); + } + + return ManifestNodeExtensions.MappingSequence(podSpec, "containers") + .Concat(ManifestNodeExtensions.MappingSequence(podSpec, "initContainers")) + .ToList(); + } + + public IReadOnlyList ContainerSpecs() + { + return ContainerMappings() + .Select(container => new ContainerSpec( + ManifestNodeExtensions.Scalar(container, "name") ?? "", + ManifestNodeExtensions.Scalar(container, "image") ?? string.Empty, + ManifestNodeExtensions.Scalar(container, "imagePullPolicy") ?? string.Empty)) + .ToList(); + } +} + +internal sealed record ContainerSpec(string Name, string Image, string ImagePullPolicy); + +internal static class ManifestNodeExtensions +{ + public static string? Scalar(this YamlMappingNode mapping, params string[] path) + { + return TryGetNode(mapping, path, out var node) && node is YamlScalarNode scalar + ? scalar.Value + : null; + } + + public static YamlMappingNode? Mapping(this YamlMappingNode mapping, params string[] path) + { + return TryGetNode(mapping, path, out var node) ? node as YamlMappingNode : null; + } + + public static bool TryGetMapping(this YamlMappingNode mapping, string key, out YamlMappingNode result) + { + if (TryGetChild(mapping, key, out var child) && child is YamlMappingNode childMapping) + { + result = childMapping; + return true; + } + + result = null!; + return false; + } + + public static IReadOnlyList MappingSequence(this YamlMappingNode mapping, params string[] path) + { + return TryGetNode(mapping, path, out var node) && node is YamlSequenceNode sequence + ? sequence.Children.OfType().ToList() + : Array.Empty(); + } + + public static IReadOnlyList ScalarSequence(this YamlMappingNode mapping, params string[] path) + { + return TryGetNode(mapping, path, out var node) && node is YamlSequenceNode sequence + ? sequence.Children.OfType() + .Select(child => child.Value) + .Where(value => !string.IsNullOrWhiteSpace(value)) + .Cast() + .ToList() + : Array.Empty(); + } + + public static IEnumerable AllScalars(YamlNode node) + { + return node switch + { + YamlScalarNode scalar when !string.IsNullOrWhiteSpace(scalar.Value) => new[] { scalar.Value! }, + YamlSequenceNode sequence => sequence.Children.SelectMany(AllScalars), + YamlMappingNode mapping => mapping.Children.SelectMany(entry => AllScalars(entry.Key).Concat(AllScalars(entry.Value))), + _ => Array.Empty(), + }; + } + + private static bool TryGetNode(YamlMappingNode mapping, IReadOnlyList path, out YamlNode node) + { + YamlNode current = mapping; + foreach (var segment in path) + { + if (current is not YamlMappingNode currentMapping || !TryGetChild(currentMapping, segment, out current)) + { + node = null!; + return false; + } + } + + node = current; + return true; + } + + private static bool TryGetChild(YamlMappingNode mapping, string key, out YamlNode value) + { + foreach (var entry in mapping.Children) + { + if (entry.Key is YamlScalarNode scalar + && string.Equals(scalar.Value, key, StringComparison.Ordinal)) + { + value = entry.Value; + return true; + } + } + + value = null!; + return false; + } +} diff --git a/tests/bluejay-infra-lint/conftest.dev/01_cross_namespace_ingressroute.rego b/tests/bluejay-infra-lint/conftest.dev/01_cross_namespace_ingressroute.rego new file mode 100644 index 0000000..0ca39a3 --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/01_cross_namespace_ingressroute.rego @@ -0,0 +1,12 @@ +package bluejayinfra.cross_namespace_ingressroute + +deny[msg] { + input.kind == "IngressRoute" + ns := object.get(input.metadata, "namespace", "") + route := input.spec.routes[_] + service := route.services[_] + svc_ns := object.get(service, "namespace", "") + svc_ns != "" + svc_ns != ns + msg := sprintf("IngressRoute %s/%s references Service %s in namespace %s", [ns, input.metadata.name, service.name, svc_ns]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/02_public_method_allowlist.rego b/tests/bluejay-infra-lint/conftest.dev/02_public_method_allowlist.rego new file mode 100644 index 0000000..691d08a --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/02_public_method_allowlist.rego @@ -0,0 +1,23 @@ +package bluejayinfra.public_method_allowlist + +public_hosts := {"dist.flowercore.io", "dns.iamworkin.lan"} + +deny[msg] { + input.kind == "IngressRoute" + route := input.spec.routes[_] + match := object.get(route, "match", "") + host := public_hosts[_] + contains(match, sprintf("Host(`%s`)", [host])) + not contains(match, "Method(`GET`)") + msg := sprintf("IngressRoute %s/%s is missing Method(GET) for public read-only host %s", [input.metadata.namespace, input.metadata.name, host]) +} + +deny[msg] { + input.kind == "IngressRoute" + route := input.spec.routes[_] + match := object.get(route, "match", "") + host := public_hosts[_] + contains(match, sprintf("Host(`%s`)", [host])) + not contains(match, "Method(`HEAD`)") + msg := sprintf("IngressRoute %s/%s is missing Method(HEAD) for public read-only host %s", [input.metadata.namespace, input.metadata.name, host]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/03_traefik_vip_backend_ports.rego b/tests/bluejay-infra-lint/conftest.dev/03_traefik_vip_backend_ports.rego new file mode 100644 index 0000000..3b8c317 --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/03_traefik_vip_backend_ports.rego @@ -0,0 +1,30 @@ +package bluejayinfra.traefik_vip_backend_ports + +has_vip { + some i + some j + input.spec.egress[i].to[j].ipBlock.cidr == "10.0.56.200/32" +} + +has_port(port) { + some i + some j + input.spec.egress[i].ports[j].port == port +} + +deny[msg] { + input.kind == "NetworkPolicy" + has_vip + has_port(443) + not has_port(8443) + msg := sprintf("NetworkPolicy %s/%s allows 10.0.56.200:443 without backend port 8443", [input.metadata.namespace, input.metadata.name]) +} + +deny[msg] { + input.kind == "NetworkPolicy" + has_vip + has_port(80) + not has_port(8080) + not has_port(8000) + msg := sprintf("NetworkPolicy %s/%s allows 10.0.56.200:80 without backend HTTP port 8080 or 8000", [input.metadata.namespace, input.metadata.name]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/04_auth_probe_path.rego b/tests/bluejay-infra-lint/conftest.dev/04_auth_probe_path.rego new file mode 100644 index 0000000..b627d8a --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/04_auth_probe_path.rego @@ -0,0 +1,28 @@ +package bluejayinfra.auth_probe_path + +protected_deployments := { + "messageboard-web", + "scoreboard-web", + "segmentdisplay-web", + "signalcontrol-web", +} + +deny[msg] { + input.kind == "Deployment" + protected_deployments[input.metadata.name] + container := input.spec.template.spec.containers[_] + probe := object.get(container, "readinessProbe", {}) + http_get := object.get(probe, "httpGet", {}) + object.get(http_get, "path", "") == "/health" + msg := sprintf("Deployment %s/%s must not use readinessProbe.httpGet /health behind API key middleware", [input.metadata.namespace, input.metadata.name]) +} + +deny[msg] { + input.kind == "Deployment" + protected_deployments[input.metadata.name] + container := input.spec.template.spec.containers[_] + probe := object.get(container, "livenessProbe", {}) + http_get := object.get(probe, "httpGet", {}) + object.get(http_get, "path", "") == "/health" + msg := sprintf("Deployment %s/%s must not use livenessProbe.httpGet /health behind API key middleware", [input.metadata.namespace, input.metadata.name]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/05_statefulset_volumeclaim_defaults.rego b/tests/bluejay-infra-lint/conftest.dev/05_statefulset_volumeclaim_defaults.rego new file mode 100644 index 0000000..b74a833 --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/05_statefulset_volumeclaim_defaults.rego @@ -0,0 +1,23 @@ +package bluejayinfra.statefulset_volumeclaim_defaults + +deny[msg] { + input.kind == "StatefulSet" + count(object.get(input.spec, "volumeClaimTemplates", [])) > 0 + object.get(input.spec, "podManagementPolicy", "") == "" + msg := sprintf("StatefulSet %s/%s is missing spec.podManagementPolicy", [input.metadata.namespace, input.metadata.name]) +} + +deny[msg] { + input.kind == "StatefulSet" + count(object.get(input.spec, "volumeClaimTemplates", [])) > 0 + object.get(input.spec, "revisionHistoryLimit", 0) == 0 + msg := sprintf("StatefulSet %s/%s is missing spec.revisionHistoryLimit", [input.metadata.namespace, input.metadata.name]) +} + +deny[msg] { + input.kind == "StatefulSet" + claim := input.spec.volumeClaimTemplates[_] + object.get(claim.spec, "volumeMode", "") != "Filesystem" + claim_name := object.get(claim.metadata, "name", "") + msg := sprintf("StatefulSet %s/%s volumeClaimTemplate %s is missing volumeMode: Filesystem", [input.metadata.namespace, input.metadata.name, claim_name]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/06_localhost_image_pull_policy.rego b/tests/bluejay-infra-lint/conftest.dev/06_localhost_image_pull_policy.rego new file mode 100644 index 0000000..9821e12 --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/06_localhost_image_pull_policy.rego @@ -0,0 +1,40 @@ +package bluejayinfra.localhost_image_pull_policy + +pod_spec(spec) = pod { + input.kind == "Deployment" + pod := spec.template.spec +} + +pod_spec(spec) = pod { + input.kind == "StatefulSet" + pod := spec.template.spec +} + +pod_spec(spec) = pod { + input.kind == "DaemonSet" + pod := spec.template.spec +} + +deny[msg] { + pod := pod_spec(input.spec) + container := pod.containers[_] + startswith(object.get(container, "image", ""), "localhost/") + object.get(container, "imagePullPolicy", "") != "Never" + msg := sprintf("%s/%s container %s uses a localhost image without imagePullPolicy: Never", [input.metadata.namespace, input.metadata.name, container.name]) +} + +deny[msg] { + pod := pod_spec(input.spec) + container := pod.initContainers[_] + startswith(object.get(container, "image", ""), "localhost/") + object.get(container, "imagePullPolicy", "") != "Never" + msg := sprintf("%s/%s initContainer %s uses a localhost image without imagePullPolicy: Never", [input.metadata.namespace, input.metadata.name, container.name]) +} + +deny[msg] { + pod := pod_spec(input.spec) + container := pod.containers[_] + startswith(object.get(container, "image", ""), "fc-") + not contains(object.get(container, "image", ""), "/") + msg := sprintf("%s/%s container %s uses a non-localhost FlowerCore image reference %s", [input.metadata.namespace, input.metadata.name, container.name, container.image]) +} diff --git a/tests/bluejay-infra-lint/conftest.dev/07_public_egress_dns_none.rego b/tests/bluejay-infra-lint/conftest.dev/07_public_egress_dns_none.rego new file mode 100644 index 0000000..a0164aa --- /dev/null +++ b/tests/bluejay-infra-lint/conftest.dev/07_public_egress_dns_none.rego @@ -0,0 +1,27 @@ +package bluejayinfra.public_egress_dns_none + +public_egress_workloads := { + "asterisk", + "fc-llm-bridge", + "mysql-web", + "php-web", + "ttsreader-align", + "ttsreader-kokoro", + "ttsreader-modern", + "ttsreader-piper", +} + +deny[msg] { + input.kind == "Deployment" + public_egress_workloads[input.metadata.name] + object.get(input.spec.template.spec, "dnsPolicy", "") != "None" + msg := sprintf("Deployment %s/%s must set dnsPolicy: None for public-internet egress", [input.metadata.namespace, input.metadata.name]) +} + +deny[msg] { + input.kind == "Deployment" + public_egress_workloads[input.metadata.name] + search := object.get(object.get(input.spec.template.spec, "dnsConfig", {}), "searches", [])[_] + contains(lower(search), "iamworkin.lan") + msg := sprintf("Deployment %s/%s must not include iamworkin.lan in dnsConfig.searches", [input.metadata.namespace, input.metadata.name]) +}