Compare commits
1 Commits
sprint39/c
...
sprint39/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
55729a24f9 |
@@ -103,7 +103,6 @@ curl -sk -X DELETE https://dns.iamworkin.lan/api/v1/servers/<serverId>/zones/iam
|
|||||||
- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods.
|
- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods.
|
||||||
- **Public read-write allowlist hosts**: if a public host accepts a tightly bounded write surface (e.g. bootstrap-JWT POST), pin the allowlist as `(Method(GET) || Method(HEAD) || Method(POST) || Method(OPTIONS))`. PUT/PATCH/DELETE must still 404 at the route. Track A's `updatecenter.iamworkin.lan` / `updates.iamworkin.lan` are the canonical example. The lint test enforces this invariant.
|
- **Public read-write allowlist hosts**: if a public host accepts a tightly bounded write surface (e.g. bootstrap-JWT POST), pin the allowlist as `(Method(GET) || Method(HEAD) || Method(POST) || Method(OPTIONS))`. PUT/PATCH/DELETE must still 404 at the route. Track A's `updatecenter.iamworkin.lan` / `updates.iamworkin.lan` are the canonical example. The lint test enforces this invariant.
|
||||||
- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow.
|
- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow.
|
||||||
- **RemoteDesktop isolation**: `apps/fc-desktop/network-policies.yaml` intentionally keeps desktop pod egress to named CoreDNS, `intranet-web:5300/TCP`, and noc1 step-ca `10.0.56.10:9000/9443` only. Guacamole display egress is owned separately by `apps/guacamole/guacamole.yaml` through `guacd-desktop-egress` on `5901/TCP`.
|
|
||||||
- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs.
|
- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs.
|
||||||
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
||||||
|
|
||||||
|
|||||||
@@ -20,12 +20,9 @@
|
|||||||
# 1) desktop-isolation — Browser Lab session pods.
|
# 1) desktop-isolation — Browser Lab session pods.
|
||||||
#
|
#
|
||||||
# Locks down pods labeled `app.kubernetes.io/name=remote-desktop` (every
|
# Locks down pods labeled `app.kubernetes.io/name=remote-desktop` (every
|
||||||
# session pod regardless of template). Allows guacd ingress for the display
|
# session pod regardless of template). Allows guacd ingress for the VNC/RDP
|
||||||
# lane and remotedesktop-web's pre-handoff probing. Egress is deliberately
|
# display lane and remotedesktop-web's pre-handoff probing. Egress: NFS to
|
||||||
# narrow: named CoreDNS, direct Intranet web, and noc1 step-ca only. There is
|
# Synology, DNS, Traefik (cluster + LB VIP), Intranet (Browser Lab home).
|
||||||
# no broad Traefik/VIP or internet egress from desktop sessions. If a future
|
|
||||||
# Browser Lab path needs a public-style host, prefer an explicit Service rule
|
|
||||||
# or include the post-DNAT backend port per the Traefik VIP lint.
|
|
||||||
apiVersion: networking.k8s.io/v1
|
apiVersion: networking.k8s.io/v1
|
||||||
kind: NetworkPolicy
|
kind: NetworkPolicy
|
||||||
metadata:
|
metadata:
|
||||||
@@ -68,22 +65,51 @@ spec:
|
|||||||
- port: 5901
|
- port: 5901
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
egress:
|
egress:
|
||||||
# CoreDNS only. The old to: [] DNS rule accidentally allowed any DNS
|
# NFS to Synology
|
||||||
# listener in any namespace or routed network.
|
|
||||||
- to:
|
- to:
|
||||||
- namespaceSelector:
|
- ipBlock:
|
||||||
matchLabels:
|
cidr: 10.0.58.3/32
|
||||||
kubernetes.io/metadata.name: kube-system
|
ports:
|
||||||
podSelector:
|
- port: 2049
|
||||||
matchLabels:
|
protocol: TCP
|
||||||
k8s-app: kube-dns
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 445
|
||||||
|
protocol: TCP
|
||||||
|
- to: []
|
||||||
ports:
|
ports:
|
||||||
- port: 53
|
- port: 53
|
||||||
protocol: UDP
|
protocol: UDP
|
||||||
- port: 53
|
- port: 53
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
# Browser Lab home / internal docs target. Use the real service port
|
- to:
|
||||||
# directly rather than public Traefik host aliases.
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.33.87/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
- to:
|
- to:
|
||||||
- namespaceSelector:
|
- namespaceSelector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@@ -94,17 +120,6 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- port: 5300
|
- port: 5300
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
# noc1 step-ca ACME endpoint. The lane brief called out 9000/TCP; the live
|
|
||||||
# ACME directory currently answers on 9443/TCP, so both stay pinned to the
|
|
||||||
# same host rather than reopening Traefik or internet egress.
|
|
||||||
- to:
|
|
||||||
- ipBlock:
|
|
||||||
cidr: 10.0.56.10/32
|
|
||||||
ports:
|
|
||||||
- port: 9000
|
|
||||||
protocol: TCP
|
|
||||||
- port: 9443
|
|
||||||
protocol: TCP
|
|
||||||
---
|
---
|
||||||
# 2) fc-desktop-default-deny — namespace-wide catch-all.
|
# 2) fc-desktop-default-deny — namespace-wide catch-all.
|
||||||
#
|
#
|
||||||
@@ -315,11 +330,3 @@ spec:
|
|||||||
protocol: UDP
|
protocol: UDP
|
||||||
- port: 53
|
- port: 53
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
- to:
|
|
||||||
- ipBlock:
|
|
||||||
cidr: 10.0.56.10/32
|
|
||||||
ports:
|
|
||||||
- port: 9000
|
|
||||||
protocol: TCP
|
|
||||||
- port: 9443
|
|
||||||
protocol: TCP
|
|
||||||
|
|||||||
@@ -254,68 +254,6 @@ spec:
|
|||||||
targetPort: 4822
|
targetPort: 4822
|
||||||
name: guacd
|
name: guacd
|
||||||
---
|
---
|
||||||
# Guacd display egress isolation.
|
|
||||||
#
|
|
||||||
# Guacamole web talks to guacd on TCP/4822. Guacd then opens the desktop
|
|
||||||
# display connection to the per-session pod. Keep that second hop at raw VNC
|
|
||||||
# 5901/TCP for the current RemoteDesktop Browser Lab/openSUSE images. Do not
|
|
||||||
# grant guacd broad fc-desktop namespace egress; desktop-to-desktop lateral
|
|
||||||
# paths remain blocked by apps/fc-desktop/network-policies.yaml.
|
|
||||||
apiVersion: networking.k8s.io/v1
|
|
||||||
kind: NetworkPolicy
|
|
||||||
metadata:
|
|
||||||
name: guacd-desktop-egress
|
|
||||||
namespace: guacamole
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: remotedesktop
|
|
||||||
app.kubernetes.io/component: display-isolation
|
|
||||||
spec:
|
|
||||||
podSelector:
|
|
||||||
matchLabels:
|
|
||||||
app: guacd
|
|
||||||
policyTypes:
|
|
||||||
- Ingress
|
|
||||||
- Egress
|
|
||||||
ingress:
|
|
||||||
- from:
|
|
||||||
- podSelector:
|
|
||||||
matchLabels:
|
|
||||||
app: guacamole
|
|
||||||
ports:
|
|
||||||
- port: 4822
|
|
||||||
protocol: TCP
|
|
||||||
egress:
|
|
||||||
- to:
|
|
||||||
- namespaceSelector:
|
|
||||||
matchLabels:
|
|
||||||
kubernetes.io/metadata.name: kube-system
|
|
||||||
podSelector:
|
|
||||||
matchLabels:
|
|
||||||
k8s-app: kube-dns
|
|
||||||
ports:
|
|
||||||
- port: 53
|
|
||||||
protocol: UDP
|
|
||||||
- port: 53
|
|
||||||
protocol: TCP
|
|
||||||
# kubectl-proxy sidecar reaches the Kubernetes API; keep it explicit
|
|
||||||
# because this NetworkPolicy selects the whole guacd pod.
|
|
||||||
- to: []
|
|
||||||
ports:
|
|
||||||
- port: 443
|
|
||||||
protocol: TCP
|
|
||||||
- port: 6443
|
|
||||||
protocol: TCP
|
|
||||||
- to:
|
|
||||||
- namespaceSelector:
|
|
||||||
matchLabels:
|
|
||||||
kubernetes.io/metadata.name: fc-desktop
|
|
||||||
podSelector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: remote-desktop
|
|
||||||
ports:
|
|
||||||
- port: 5901
|
|
||||||
protocol: TCP
|
|
||||||
---
|
|
||||||
# Guacamole Web Application
|
# Guacamole Web Application
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
|
|||||||
@@ -824,6 +824,53 @@ data:
|
|||||||
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
||||||
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
||||||
|
|
||||||
|
- alert: LonghornPVCGrowthRapid
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
(
|
||||||
|
(
|
||||||
|
longhorn_volume_actual_size_bytes
|
||||||
|
- (longhorn_volume_actual_size_bytes offset 1h)
|
||||||
|
)
|
||||||
|
/ clamp_min(longhorn_volume_actual_size_bytes offset 1h, 1)
|
||||||
|
)
|
||||||
|
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
||||||
|
(
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
* on(namespace, persistentvolumeclaim) group_left()
|
||||||
|
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
||||||
|
)
|
||||||
|
or
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
)
|
||||||
|
) > 0.20
|
||||||
|
or
|
||||||
|
(
|
||||||
|
(
|
||||||
|
longhorn_volume_actual_size_bytes
|
||||||
|
/ on(volume) clamp_min(longhorn_volume_capacity_bytes, 1)
|
||||||
|
)
|
||||||
|
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
||||||
|
(
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
* on(namespace, persistentvolumeclaim) group_left()
|
||||||
|
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
||||||
|
)
|
||||||
|
or
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
)
|
||||||
|
) > 0.80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: thermal_print
|
||||||
|
service: remotedesktop
|
||||||
|
annotations:
|
||||||
|
summary: "RemoteDesktop Longhorn PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} growing rapidly"
|
||||||
|
description: "Longhorn volume {{ $labels.volume }} backing RemoteDesktop PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} grew more than 20% in 1h or is over 80% capacity. Check for runaway SQLite/user-profile growth; this alert was added after the 2026-05-16 RemoteDesktop web SQLite Error 13 incident."
|
||||||
|
runbook: "1. kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }} 2. Open Longhorn UI volume {{ $labels.volume }} 3. Check RemoteDesktop web/user-volume SQLite files for permission or runaway growth 4. Expand PVC only after confirming the writer is healthy"
|
||||||
|
todo: "2026-05-19 metric gate: live noc1 Prometheus currently exposes kube_persistentvolumeclaim_info and kube_persistentvolumeclaim_resource_requests_storage_bytes, but not longhorn_volume_actual_size_bytes, longhorn_volume_capacity_bytes, kube_persistentvolumeclaim_labels, or kubelet_volume_stats_used_bytes. Keep the fc-desktop PVC fallback until kube-state-metrics label allowlist exposes flowercore.io/managed-by=remotedesktop."
|
||||||
|
|
||||||
- name: pi-fleet
|
- name: pi-fleet
|
||||||
rules:
|
rules:
|
||||||
- alert: PiManagerDown
|
- alert: PiManagerDown
|
||||||
|
|||||||
@@ -1,93 +0,0 @@
|
|||||||
using FluentAssertions;
|
|
||||||
using Xunit;
|
|
||||||
|
|
||||||
namespace BluejayInfraLint.Tests;
|
|
||||||
|
|
||||||
[Trait("Category", "Unit")]
|
|
||||||
public sealed class RemoteDesktopNetworkPolicyTests
|
|
||||||
{
|
|
||||||
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LiveDesktopIsolation_AllowsOnlyCoreDnsIntranetAndStepCaEgress()
|
|
||||||
{
|
|
||||||
var policy = NetworkPolicy("fc-desktop", "desktop-isolation");
|
|
||||||
var ports = policy.EgressPorts().ToHashSet(StringComparer.Ordinal);
|
|
||||||
|
|
||||||
ports.Should().BeEquivalentTo("53", "5300", "9000", "9443");
|
|
||||||
policy.AllScalars().Should().Contain(new[]
|
|
||||||
{
|
|
||||||
"kube-system",
|
|
||||||
"kube-dns",
|
|
||||||
"intranet",
|
|
||||||
"intranet-web",
|
|
||||||
"10.0.56.10/32"
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LiveDesktopIsolation_RemovesInternetNfsAndTraefikEgress()
|
|
||||||
{
|
|
||||||
var policy = NetworkPolicy("fc-desktop", "desktop-isolation");
|
|
||||||
var scalars = policy.AllScalars().ToList();
|
|
||||||
var ports = policy.EgressPorts().ToHashSet(StringComparer.Ordinal);
|
|
||||||
|
|
||||||
scalars.Should().NotContain(new[] { "10.0.58.3/32", "10.0.56.200/32", "10.43.33.87/32", "traefik-system" });
|
|
||||||
ports.Should().NotContain(new[] { "80", "443", "445", "111", "2049", "8000", "8080", "8443" });
|
|
||||||
policy.MappingSequence("spec", "egress")
|
|
||||||
.Should()
|
|
||||||
.NotContain(rule => EgressRuleHasEmptyTo(rule), "desktop sessions must not use to: [] internet-style egress");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LiveGuacdIsolation_AllowsRawVncToDesktopPodsOnly()
|
|
||||||
{
|
|
||||||
var policy = NetworkPolicy("guacamole", "guacd-desktop-egress");
|
|
||||||
var scalars = policy.AllScalars().ToList();
|
|
||||||
var ports = policy.EgressPorts().ToHashSet(StringComparer.Ordinal);
|
|
||||||
|
|
||||||
ports.Should().Contain("5901");
|
|
||||||
scalars.Should().Contain(new[] { "fc-desktop", "remote-desktop" });
|
|
||||||
ports.Should().NotContain(new[] { "3000", "3001", "3389", "80", "8080", "8443" });
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LiveGuacdIsolation_KeepsGuacamoleWebIngressOnGuacdPort()
|
|
||||||
{
|
|
||||||
var policy = NetworkPolicy("guacamole", "guacd-desktop-egress");
|
|
||||||
|
|
||||||
policy.Scalar("spec", "podSelector", "matchLabels", "app").Should().Be("guacd");
|
|
||||||
policy.AllScalars().Should().Contain(new[] { "guacamole", "4822" });
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void HelperSmoke_FindsExpectedRemoteDesktopPolicies()
|
|
||||||
{
|
|
||||||
NetworkPolicy("fc-desktop", "desktop-isolation").Name.Should().Be("desktop-isolation");
|
|
||||||
NetworkPolicy("guacamole", "guacd-desktop-egress").Name.Should().Be("guacd-desktop-egress");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void HelperSmoke_EgressPortExtractionKeepsDistinctPorts()
|
|
||||||
{
|
|
||||||
var ports = NetworkPolicy("fc-desktop", "desktop-isolation")
|
|
||||||
.EgressPorts()
|
|
||||||
.ToHashSet(StringComparer.Ordinal);
|
|
||||||
|
|
||||||
ports.Should().HaveCount(4);
|
|
||||||
ports.Should().Contain(new[] { "53", "5300", "9000", "9443" });
|
|
||||||
}
|
|
||||||
|
|
||||||
private static ManifestDocument NetworkPolicy(string ns, string name)
|
|
||||||
=> Inventory.Documents.Single(document =>
|
|
||||||
document.Kind == "NetworkPolicy"
|
|
||||||
&& string.Equals(document.Namespace, ns, StringComparison.Ordinal)
|
|
||||||
&& string.Equals(document.Name, name, StringComparison.Ordinal));
|
|
||||||
|
|
||||||
private static bool EgressRuleHasEmptyTo(YamlDotNet.RepresentationModel.YamlMappingNode rule)
|
|
||||||
=> rule.Children.Any(entry =>
|
|
||||||
entry.Key is YamlDotNet.RepresentationModel.YamlScalarNode key
|
|
||||||
&& string.Equals(key.Value, "to", StringComparison.Ordinal)
|
|
||||||
&& entry.Value is YamlDotNet.RepresentationModel.YamlSequenceNode sequence
|
|
||||||
&& sequence.Children.Count == 0);
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user