Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
6382582090 fc-desktop: add remotedesktop warm pool intent 2026-05-19 12:27:47 -05:00
4 changed files with 139 additions and 50 deletions

View File

@@ -1,10 +1,13 @@
# FlowerCore Remote Desktop — TLS + Ingress # FlowerCore Remote Desktop — TLS + Ingress
# #
# Source-of-truth split: # Source-of-truth split:
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies # - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies,
# and the explicit RemoteDesktopPoolCrd warm-pool intent in
# remotedesktop-pools.yaml.
# (see network-policies.yaml in this directory). # (see network-policies.yaml in this directory).
# - FlowerCore.RemoteDesktop scripts/deploy-web.sh OWNS: Deployment + # - FlowerCore.RemoteDesktop OWNS: CRD definition/operator Deployment and
# Service. Reason: image refs like `localhost/fc-desktop:linux-xfce` # scripts/deploy-web.sh Deployment + Service. Reason: image refs like
# `localhost/fc-desktop:linux-xfce`
# only exist on each node's containerd after a manual import, so a # only exist on each node's containerd after a manual import, so a
# Deployment manifest in bluejay-infra would race the image-import # Deployment manifest in bluejay-infra would race the image-import
# step and crash-loop. # step and crash-loop.

View File

@@ -0,0 +1,101 @@
# FlowerCore RemoteDesktop warm-pool intent.
#
# These CRDs are deliberately explicit. The RemoteDesktop warmup loop no
# longer scans template defaults to decide what to warm; every enabled pool
# here represents operator/GitOps intent and prevents a repeat of the
# orphan-pool leak from 2026-05-08.
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: browser-lab-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: browser-only
desiredSize: 1
enabled: true
reconcileNow: true
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: opensuse-xfce-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: opensuse-xfce
desiredSize: 1
enabled: true
userVolumeMode: LateAttach
reconcileNow: true
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: dev-workstation-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: dev-workstation
desiredSize: 1
enabled: true
userVolumeMode: LateAttach
reconcileNow: true
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: ai-station-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: ai-station
desiredSize: 1
enabled: true
userVolumeMode: LateAttach
reconcileNow: true
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: linux-xfce-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: linux-xfce
desiredSize: 1
enabled: true
userVolumeMode: LateAttach
reconcileNow: true
---
apiVersion: flowercore.io/v1
kind: RemoteDesktopPoolCrd
metadata:
name: linux-xfce-rdp-pool
namespace: fc-desktop
labels:
app.kubernetes.io/name: remotedesktop-pool
app.kubernetes.io/part-of: flowercore-remotedesktop
app.kubernetes.io/managed-by: bluejay-infra
spec:
templateSlug: linux-xfce-rdp
desiredSize: 1
enabled: true
userVolumeMode: LateAttach
reconcileNow: true

View File

@@ -824,53 +824,6 @@ data:
summary: "desktop.iamworkin.lan TLS cert expires within 2 days" summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate." description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
- alert: LonghornPVCGrowthRapid
expr: |
(
(
(
longhorn_volume_actual_size_bytes
- (longhorn_volume_actual_size_bytes offset 1h)
)
/ clamp_min(longhorn_volume_actual_size_bytes offset 1h, 1)
)
* on(volume) group_left(namespace, persistentvolumeclaim) (
(
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
* on(namespace, persistentvolumeclaim) group_left()
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
)
or
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
)
) > 0.20
or
(
(
longhorn_volume_actual_size_bytes
/ on(volume) clamp_min(longhorn_volume_capacity_bytes, 1)
)
* on(volume) group_left(namespace, persistentvolumeclaim) (
(
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
* on(namespace, persistentvolumeclaim) group_left()
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
)
or
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
)
) > 0.80
for: 5m
labels:
severity: warning
alert_channel: thermal_print
service: remotedesktop
annotations:
summary: "RemoteDesktop Longhorn PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} growing rapidly"
description: "Longhorn volume {{ $labels.volume }} backing RemoteDesktop PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} grew more than 20% in 1h or is over 80% capacity. Check for runaway SQLite/user-profile growth; this alert was added after the 2026-05-16 RemoteDesktop web SQLite Error 13 incident."
runbook: "1. kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }} 2. Open Longhorn UI volume {{ $labels.volume }} 3. Check RemoteDesktop web/user-volume SQLite files for permission or runaway growth 4. Expand PVC only after confirming the writer is healthy"
todo: "2026-05-19 metric gate: live noc1 Prometheus currently exposes kube_persistentvolumeclaim_info and kube_persistentvolumeclaim_resource_requests_storage_bytes, but not longhorn_volume_actual_size_bytes, longhorn_volume_capacity_bytes, kube_persistentvolumeclaim_labels, or kubelet_volume_stats_used_bytes. Keep the fc-desktop PVC fallback until kube-state-metrics label allowlist exposes flowercore.io/managed-by=remotedesktop."
- name: pi-fleet - name: pi-fleet
rules: rules:
- alert: PiManagerDown - alert: PiManagerDown

View File

@@ -387,6 +387,38 @@ public sealed class FleetManifestLintTests
violations.Should().BeEmpty(); violations.Should().BeEmpty();
} }
[Fact]
public void RemoteDesktopPoolCrds_MustExplicitlyOptInHookReadyTemplates()
{
var expectedModes = new Dictionary<string, string?>(StringComparer.Ordinal)
{
["browser-only"] = null,
["opensuse-xfce"] = "LateAttach",
["dev-workstation"] = "LateAttach",
["ai-station"] = "LateAttach",
["linux-xfce"] = "LateAttach",
["linux-xfce-rdp"] = "LateAttach",
};
var pools = Inventory.Documents
.Where(document => document.Kind == "RemoteDesktopPoolCrd")
.Where(document => document.RelativePath == "fc-desktop/remotedesktop-pools.yaml")
.ToDictionary(
document => document.Scalar("spec", "templateSlug") ?? string.Empty,
StringComparer.Ordinal);
pools.Keys.Should().BeEquivalentTo(expectedModes.Keys);
foreach (var expected in expectedModes)
{
var pool = pools[expected.Key];
pool.Namespace.Should().Be("fc-desktop");
pool.Scalar("spec", "desiredSize").Should().Be("1");
pool.Scalar("spec", "enabled").Should().Be("true");
pool.Scalar("spec", "reconcileNow").Should().Be("true");
pool.Scalar("spec", "userVolumeMode").Should().Be(expected.Value);
}
}
[Fact] [Fact]
public void PublicEgressDeployments_MustOptOutOfIamworkinLanSearchSuffixes() public void PublicEgressDeployments_MustOptOutOfIamworkinLanSearchSuffixes()
{ {