Compare commits
7 Commits
2a66109f13
...
codex/s60-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a3cd67d6bb | ||
|
|
81a3ddac4c | ||
| 300f8ad546 | |||
| fe38c2641f | |||
|
|
3b40dfb185 | ||
| 103878671c | |||
|
|
36039c1335 |
@@ -109,6 +109,7 @@ spec:
|
|||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||||
spec:
|
spec:
|
||||||
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
|
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
|
||||||
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),
|
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),
|
||||||
@@ -126,7 +127,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
||||||
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-distribution:v202605061948
|
image: localhost/fc-distribution:v20260604-oidc-root-anon
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|||||||
@@ -101,6 +101,7 @@ spec:
|
|||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "5320"
|
prometheus.io/port: "5320"
|
||||||
prometheus.io/path: "/metrics/prometheus"
|
prometheus.io/path: "/metrics/prometheus"
|
||||||
|
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||||
spec:
|
spec:
|
||||||
serviceAccountName: dns-web
|
serviceAccountName: dns-web
|
||||||
securityContext:
|
securityContext:
|
||||||
|
|||||||
@@ -131,6 +131,7 @@ spec:
|
|||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "5200"
|
prometheus.io/port: "5200"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||||
spec:
|
spec:
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/hostname: rke2-server
|
kubernetes.io/hostname: rke2-server
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ spec:
|
|||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||||
spec:
|
spec:
|
||||||
securityContext:
|
securityContext:
|
||||||
runAsNonRoot: true
|
runAsNonRoot: true
|
||||||
@@ -123,9 +124,9 @@ spec:
|
|||||||
value: "Production"
|
value: "Production"
|
||||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
value: "false"
|
value: "false"
|
||||||
# AuthentiK/OIDC is wired but not enforced until the
|
# AuthentiK/OIDC is enforced. /healthz stays anonymous by contract;
|
||||||
# knowledge-oidc-client Secret is provisioned and
|
# see flowercore.io/healthz-auth-policy above and the Sprint 58
|
||||||
# FlowerCore__Auth__Enabled is flipped to true.
|
# OIDC readiness probe audit.
|
||||||
- name: FlowerCore__Auth__Enabled
|
- name: FlowerCore__Auth__Enabled
|
||||||
value: "true"
|
value: "true"
|
||||||
- name: FlowerCore__Auth__Oidc__Enabled
|
- name: FlowerCore__Auth__Oidc__Enabled
|
||||||
|
|||||||
@@ -216,19 +216,24 @@ data:
|
|||||||
- job_name: "pimanager-app"
|
- job_name: "pimanager-app"
|
||||||
scrape_interval: 15s
|
scrape_interval: 15s
|
||||||
metrics_path: /metrics
|
metrics_path: /metrics
|
||||||
|
scheme: https
|
||||||
|
tls_config:
|
||||||
|
insecure_skip_verify: true
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["10.0.58.25:5000"]
|
- targets: ["piez.iamworkin.lan"]
|
||||||
labels:
|
labels:
|
||||||
instance: "piez"
|
instance: "piez"
|
||||||
service: "pimanager"
|
service: "signalcontrol"
|
||||||
vlan: "home"
|
vlan: "home"
|
||||||
device: "pi4-ezconnect"
|
device: "pi4-ezconnect"
|
||||||
- targets: ["10.0.58.113:5200"]
|
rig: "signal-b"
|
||||||
|
- targets: ["pirelay.iamworkin.lan"]
|
||||||
labels:
|
labels:
|
||||||
instance: "pirelay"
|
instance: "pirelay"
|
||||||
service: "pimanager"
|
service: "signalcontrol"
|
||||||
vlan: "home"
|
vlan: "home"
|
||||||
device: "pi3-ks0212"
|
device: "pi3-ks0212"
|
||||||
|
rig: "signal-a"
|
||||||
|
|
||||||
# Epson ET-3750 EcoTank Printer SNMP
|
# Epson ET-3750 EcoTank Printer SNMP
|
||||||
- job_name: "snmp-printer"
|
- job_name: "snmp-printer"
|
||||||
@@ -481,22 +486,31 @@ data:
|
|||||||
- "https://intranet.iamworkin.lan/"
|
- "https://intranet.iamworkin.lan/"
|
||||||
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://kiosk.iamworkin.lan/"
|
- "https://kiosk.iamworkin.lan/"
|
||||||
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anon 200
|
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||||
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://zabbix.iamworkin.lan/"
|
- "https://zabbix.iamworkin.lan/"
|
||||||
- "https://desktop.iamworkin.lan/"
|
- "https://desktop.iamworkin.lan/"
|
||||||
- "https://print.iamworkin.lan/"
|
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
||||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anon 200
|
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||||
- "https://chat.iamworkin.lan/"
|
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
|
||||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anon 200
|
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
|
||||||
- "https://dms.iamworkin.lan/"
|
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
|
||||||
|
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
|
||||||
|
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
|
||||||
|
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
|
||||||
|
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
||||||
|
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
||||||
|
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
||||||
- "https://menuboard.iamworkin.lan/"
|
- "https://menuboard.iamworkin.lan/"
|
||||||
- "https://messageboard.iamworkin.lan/"
|
- "https://messageboard.iamworkin.lan/"
|
||||||
- "https://presentations.iamworkin.lan/"
|
- "https://presentations.iamworkin.lan/"
|
||||||
- "https://retail.iamworkin.lan/"
|
- "https://retail.iamworkin.lan/"
|
||||||
- "https://ttsreader.iamworkin.lan/"
|
- "https://ttsreader.iamworkin.lan/"
|
||||||
# Explicit healthcheck paths
|
# Explicit healthcheck paths
|
||||||
|
- "https://library.iamworkin.lan/health"
|
||||||
|
- "https://aistation.iamworkin.lan/healthz"
|
||||||
|
- "https://knowledge.iamworkin.lan/healthz"
|
||||||
- "https://fc-llm-bridge.iamworkin.lan/healthz"
|
- "https://fc-llm-bridge.iamworkin.lan/healthz"
|
||||||
- "https://acme.iamworkin.lan/health"
|
- "https://acme.iamworkin.lan/health"
|
||||||
# NOTE: services intentionally NOT in this probe surface
|
# NOTE: services intentionally NOT in this probe surface
|
||||||
@@ -908,12 +922,13 @@ data:
|
|||||||
# of idle and SNMP times out, so 5m for: would page nightly. A
|
# of idle and SNMP times out, so 5m for: would page nightly. A
|
||||||
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
||||||
- alert: EpsonPrinterDown
|
- alert: EpsonPrinterDown
|
||||||
expr: up{job="snmp-printer"} == 0
|
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
|
||||||
for: 30m
|
for: 30m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: info
|
||||||
|
alert_channel: irc
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
|
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
|
||||||
|
|
||||||
- alert: SynologyDiskLow
|
- alert: SynologyDiskLow
|
||||||
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
||||||
@@ -1020,7 +1035,12 @@ data:
|
|||||||
- name: kubernetes-state
|
- name: kubernetes-state
|
||||||
rules:
|
rules:
|
||||||
- alert: KubeContainerRestartingFrequently
|
- alert: KubeContainerRestartingFrequently
|
||||||
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
|
# Exclude github-runner: ephemeral runners register, run one job,
|
||||||
|
# exit cleanly, and restart by design. Also require kube_pod_info so
|
||||||
|
# deleted rollout pods do not keep firing from retained restart series.
|
||||||
|
expr: |
|
||||||
|
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
|
||||||
|
and on(namespace, pod) kube_pod_info
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
@@ -1029,7 +1049,12 @@ data:
|
|||||||
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
|
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
|
||||||
|
|
||||||
- alert: KubeContainerCrashLooping
|
- alert: KubeContainerCrashLooping
|
||||||
expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
|
# Same github-runner/delete-retention exclusions as the hourly
|
||||||
|
# restart rule above; real runner failures are covered by the
|
||||||
|
# dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts.
|
||||||
|
expr: |
|
||||||
|
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
|
||||||
|
and on(namespace, pod) kube_pod_info
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
severity: critical
|
severity: critical
|
||||||
@@ -1057,7 +1082,10 @@ data:
|
|||||||
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
|
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
|
||||||
|
|
||||||
- alert: KubeDeploymentReplicasMismatch
|
- alert: KubeDeploymentReplicasMismatch
|
||||||
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
|
# github-runner has explicit runner-offline alerts; the generic
|
||||||
|
# replica-mismatch rule should not page on intentionally ephemeral
|
||||||
|
# 0/1 runner churn between CI jobs.
|
||||||
|
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
|
||||||
for: 15m
|
for: 15m
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
|||||||
@@ -423,6 +423,82 @@ public sealed class FleetManifestLintTests
|
|||||||
monitoring.Should().Contain("alert_channel: irc");
|
monitoring.Should().Contain("alert_channel: irc");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Monitoring_GenericKubernetesAlerts_MustExcludeEphemeralGithubRunnerNamespace()
|
||||||
|
{
|
||||||
|
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||||
|
|
||||||
|
monitoring.Should().Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}");
|
||||||
|
monitoring.Should().Contain("and on(namespace, pod) kube_pod_info");
|
||||||
|
monitoring.Should().Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
|
||||||
|
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
|
||||||
|
{
|
||||||
|
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||||
|
|
||||||
|
monitoring.Should().Contain("https://chat.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().Contain("https://dist.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().Contain("https://dms.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().Contain("https://print.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().Contain("https://knowledge.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().Contain("https://library.iamworkin.lan/health");
|
||||||
|
monitoring.Should().Contain("https://aistation.iamworkin.lan/healthz");
|
||||||
|
monitoring.Should().NotContain("https://print.iamworkin.lan/\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void OidcEnforcedDeployments_WithHttpHealthzProbes_MustDeclareAnonymousHealthzContract()
|
||||||
|
{
|
||||||
|
var violations = Inventory.Documents
|
||||||
|
.Where(document => document.Kind == "Deployment")
|
||||||
|
.SelectMany(document => document.MainContainerMappings()
|
||||||
|
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Oidc__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||||
|
.Where(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|
||||||
|
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz")
|
||||||
|
.Where(_ => !string.Equals(
|
||||||
|
PodAnnotation(document, "flowercore.io/healthz-auth-policy"),
|
||||||
|
"allow-anonymous",
|
||||||
|
StringComparison.Ordinal))
|
||||||
|
.Select(container =>
|
||||||
|
{
|
||||||
|
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
|
||||||
|
return $"{document.Descriptor} container '{containerName}' enforces OIDC while probing /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous.";
|
||||||
|
}))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
violations.Should().BeEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||||
|
{
|
||||||
|
var knowledge = Inventory.Documents
|
||||||
|
.Single(document => document.Kind == "Deployment" && document.Namespace == "knowledge" && document.Name == "knowledge-web");
|
||||||
|
var container = knowledge.MainContainerMappings().Should().ContainSingle().Subject;
|
||||||
|
|
||||||
|
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||||
|
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||||
|
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||||
|
PodAnnotation(knowledge, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Distribution_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||||
|
{
|
||||||
|
var distribution = Inventory.Documents
|
||||||
|
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
|
||||||
|
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
|
||||||
|
|
||||||
|
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||||
|
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||||
|
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||||
|
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||||
{
|
{
|
||||||
@@ -790,6 +866,12 @@ public sealed class FleetManifestLintTests
|
|||||||
"/volume1/kubernetes/fc-media-inbox",
|
"/volume1/kubernetes/fc-media-inbox",
|
||||||
"/volume1/video",
|
"/volume1/video",
|
||||||
});
|
});
|
||||||
|
|
||||||
|
var distributionDeployment = AppDocuments("fc-distribution")
|
||||||
|
.Single(document => document.Kind == "Deployment" && document.Name == "fc-distribution");
|
||||||
|
var distributionContainer = distributionDeployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||||
|
|
||||||
|
ManifestNodeExtensions.Scalar(distributionContainer, "image").Should().Be("localhost/fc-distribution:v20260604-oidc-root-anon");
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
@@ -920,6 +1002,19 @@ public sealed class FleetManifestLintTests
|
|||||||
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string? PodAnnotation(ManifestDocument document, string name)
|
||||||
|
{
|
||||||
|
return document.Scalar("spec", "template", "metadata", "annotations", name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string? ProbeHttpGetPath(YamlMappingNode container, string probeKey)
|
||||||
|
{
|
||||||
|
return ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
|
||||||
|
&& ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)
|
||||||
|
? ManifestNodeExtensions.Scalar(httpGet, "path")
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
|
||||||
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
|
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
|
||||||
{
|
{
|
||||||
return Inventory.Documents
|
return Inventory.Documents
|
||||||
|
|||||||
Reference in New Issue
Block a user