Compare commits
3 Commits
codex/s58-
...
81a3ddac4c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
81a3ddac4c | ||
| 300f8ad546 | |||
| fe38c2641f |
@@ -109,6 +109,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
|
||||
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),
|
||||
|
||||
@@ -101,6 +101,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "5320"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
serviceAccountName: dns-web
|
||||
securityContext:
|
||||
|
||||
@@ -131,6 +131,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "5200"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-server
|
||||
|
||||
@@ -93,6 +93,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
@@ -123,9 +124,9 @@ spec:
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
# AuthentiK/OIDC is wired but not enforced until the
|
||||
# knowledge-oidc-client Secret is provisioned and
|
||||
# FlowerCore__Auth__Enabled is flipped to true.
|
||||
# AuthentiK/OIDC is enforced. /healthz stays anonymous by contract;
|
||||
# see flowercore.io/healthz-auth-policy above and the Sprint 58
|
||||
# OIDC readiness probe audit.
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
|
||||
@@ -481,22 +481,25 @@ data:
|
||||
- "https://intranet.iamworkin.lan/"
|
||||
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://kiosk.iamworkin.lan/"
|
||||
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anon 200
|
||||
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://zabbix.iamworkin.lan/"
|
||||
- "https://desktop.iamworkin.lan/"
|
||||
- "https://print.iamworkin.lan/"
|
||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anon 200
|
||||
- "https://chat.iamworkin.lan/"
|
||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anon 200
|
||||
- "https://dms.iamworkin.lan/"
|
||||
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
||||
- "https://menuboard.iamworkin.lan/"
|
||||
- "https://messageboard.iamworkin.lan/"
|
||||
- "https://presentations.iamworkin.lan/"
|
||||
- "https://retail.iamworkin.lan/"
|
||||
- "https://ttsreader.iamworkin.lan/"
|
||||
# Explicit healthcheck paths
|
||||
- "https://library.iamworkin.lan/health"
|
||||
- "https://aistation.iamworkin.lan/healthz"
|
||||
- "https://knowledge.iamworkin.lan/healthz"
|
||||
- "https://fc-llm-bridge.iamworkin.lan/healthz"
|
||||
- "https://acme.iamworkin.lan/health"
|
||||
# NOTE: services intentionally NOT in this probe surface
|
||||
@@ -1020,7 +1023,12 @@ data:
|
||||
- name: kubernetes-state
|
||||
rules:
|
||||
- alert: KubeContainerRestartingFrequently
|
||||
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
|
||||
# Exclude github-runner: ephemeral runners register, run one job,
|
||||
# exit cleanly, and restart by design. Also require kube_pod_info so
|
||||
# deleted rollout pods do not keep firing from retained restart series.
|
||||
expr: |
|
||||
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
|
||||
and on(namespace, pod) kube_pod_info
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -1029,7 +1037,12 @@ data:
|
||||
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
|
||||
|
||||
- alert: KubeContainerCrashLooping
|
||||
expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
|
||||
# Same github-runner/delete-retention exclusions as the hourly
|
||||
# restart rule above; real runner failures are covered by the
|
||||
# dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts.
|
||||
expr: |
|
||||
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
|
||||
and on(namespace, pod) kube_pod_info
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -1057,7 +1070,10 @@ data:
|
||||
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
|
||||
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
|
||||
# github-runner has explicit runner-offline alerts; the generic
|
||||
# replica-mismatch rule should not page on intentionally ephemeral
|
||||
# 0/1 runner churn between CI jobs.
|
||||
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
@@ -423,6 +423,82 @@ public sealed class FleetManifestLintTests
|
||||
monitoring.Should().Contain("alert_channel: irc");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_GenericKubernetesAlerts_MustExcludeEphemeralGithubRunnerNamespace()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}");
|
||||
monitoring.Should().Contain("and on(namespace, pod) kube_pod_info");
|
||||
monitoring.Should().Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
|
||||
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("https://chat.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://dist.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://dms.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://print.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://knowledge.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://library.iamworkin.lan/health");
|
||||
monitoring.Should().Contain("https://aistation.iamworkin.lan/healthz");
|
||||
monitoring.Should().NotContain("https://print.iamworkin.lan/\"");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OidcEnforcedDeployments_WithHttpHealthzProbes_MustDeclareAnonymousHealthzContract()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.SelectMany(document => document.MainContainerMappings()
|
||||
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Oidc__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|
||||
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz")
|
||||
.Where(_ => !string.Equals(
|
||||
PodAnnotation(document, "flowercore.io/healthz-auth-policy"),
|
||||
"allow-anonymous",
|
||||
StringComparison.Ordinal))
|
||||
.Select(container =>
|
||||
{
|
||||
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
|
||||
return $"{document.Descriptor} container '{containerName}' enforces OIDC while probing /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous.";
|
||||
}))
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||
{
|
||||
var knowledge = Inventory.Documents
|
||||
.Single(document => document.Kind == "Deployment" && document.Namespace == "knowledge" && document.Name == "knowledge-web");
|
||||
var container = knowledge.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||
PodAnnotation(knowledge, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Distribution_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||
{
|
||||
var distribution = Inventory.Documents
|
||||
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
|
||||
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||
{
|
||||
@@ -926,6 +1002,19 @@ public sealed class FleetManifestLintTests
|
||||
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
private static string? PodAnnotation(ManifestDocument document, string name)
|
||||
{
|
||||
return document.Scalar("spec", "template", "metadata", "annotations", name);
|
||||
}
|
||||
|
||||
private static string? ProbeHttpGetPath(YamlMappingNode container, string probeKey)
|
||||
{
|
||||
return ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
|
||||
&& ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)
|
||||
? ManifestNodeExtensions.Scalar(httpGet, "path")
|
||||
: null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
|
||||
{
|
||||
return Inventory.Documents
|
||||
|
||||
Reference in New Issue
Block a user