Compare commits
1 Commits
sprint40/c
...
sprint39/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
55729a24f9 |
@@ -1,24 +0,0 @@
|
|||||||
# FlowerCore Remote Desktop - Namespace ResourceQuota (GitOps-managed)
|
|
||||||
#
|
|
||||||
# Codifies the live cap applied on 2026-05-19 after disabling automatic
|
|
||||||
# desktop pool prewarm: no more than 15 pods and no more than 8 CPU / 16Gi
|
|
||||||
# memory requested across the fc-desktop namespace.
|
|
||||||
#
|
|
||||||
# ArgoCD adoption note: this manifest uses the same kind/name/namespace as
|
|
||||||
# the live ResourceQuota and avoids hook, force, or replace annotations, so
|
|
||||||
# automated sync should patch/adopt the existing object in place instead of
|
|
||||||
# recreating it.
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ResourceQuota
|
|
||||||
metadata:
|
|
||||||
name: fc-desktop-cap
|
|
||||||
namespace: fc-desktop
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: remotedesktop
|
|
||||||
app.kubernetes.io/component: capacity-guard
|
|
||||||
app.kubernetes.io/managed-by: argocd
|
|
||||||
spec:
|
|
||||||
hard:
|
|
||||||
count/pods: "15"
|
|
||||||
cpu: "8"
|
|
||||||
memory: 16Gi
|
|
||||||
@@ -824,6 +824,53 @@ data:
|
|||||||
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
||||||
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
||||||
|
|
||||||
|
- alert: LonghornPVCGrowthRapid
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
(
|
||||||
|
(
|
||||||
|
longhorn_volume_actual_size_bytes
|
||||||
|
- (longhorn_volume_actual_size_bytes offset 1h)
|
||||||
|
)
|
||||||
|
/ clamp_min(longhorn_volume_actual_size_bytes offset 1h, 1)
|
||||||
|
)
|
||||||
|
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
||||||
|
(
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
* on(namespace, persistentvolumeclaim) group_left()
|
||||||
|
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
||||||
|
)
|
||||||
|
or
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
)
|
||||||
|
) > 0.20
|
||||||
|
or
|
||||||
|
(
|
||||||
|
(
|
||||||
|
longhorn_volume_actual_size_bytes
|
||||||
|
/ on(volume) clamp_min(longhorn_volume_capacity_bytes, 1)
|
||||||
|
)
|
||||||
|
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
||||||
|
(
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
* on(namespace, persistentvolumeclaim) group_left()
|
||||||
|
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
||||||
|
)
|
||||||
|
or
|
||||||
|
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
||||||
|
)
|
||||||
|
) > 0.80
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: thermal_print
|
||||||
|
service: remotedesktop
|
||||||
|
annotations:
|
||||||
|
summary: "RemoteDesktop Longhorn PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} growing rapidly"
|
||||||
|
description: "Longhorn volume {{ $labels.volume }} backing RemoteDesktop PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} grew more than 20% in 1h or is over 80% capacity. Check for runaway SQLite/user-profile growth; this alert was added after the 2026-05-16 RemoteDesktop web SQLite Error 13 incident."
|
||||||
|
runbook: "1. kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }} 2. Open Longhorn UI volume {{ $labels.volume }} 3. Check RemoteDesktop web/user-volume SQLite files for permission or runaway growth 4. Expand PVC only after confirming the writer is healthy"
|
||||||
|
todo: "2026-05-19 metric gate: live noc1 Prometheus currently exposes kube_persistentvolumeclaim_info and kube_persistentvolumeclaim_resource_requests_storage_bytes, but not longhorn_volume_actual_size_bytes, longhorn_volume_capacity_bytes, kube_persistentvolumeclaim_labels, or kubelet_volume_stats_used_bytes. Keep the fc-desktop PVC fallback until kube-state-metrics label allowlist exposes flowercore.io/managed-by=remotedesktop."
|
||||||
|
|
||||||
- name: pi-fleet
|
- name: pi-fleet
|
||||||
rules:
|
rules:
|
||||||
- alert: PiManagerDown
|
- alert: PiManagerDown
|
||||||
|
|||||||
@@ -421,35 +421,6 @@ public sealed class FleetManifestLintTests
|
|||||||
violations.Should().BeEmpty();
|
violations.Should().BeEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
|
||||||
[Trait("Question", "Q-RD-DESKTOP-CAP-4")]
|
|
||||||
public void FcDesktop_ResourceQuotaMustCodifyLivePodCpuMemoryCap()
|
|
||||||
{
|
|
||||||
var quota = FcDesktopDocuments()
|
|
||||||
.Single(document => document.Kind == "ResourceQuota" && document.Name == "fc-desktop-cap");
|
|
||||||
|
|
||||||
quota.RelativePath.Should().Be("fc-desktop/resourcequota.yaml");
|
|
||||||
quota.Namespace.Should().Be("fc-desktop");
|
|
||||||
quota.Scalar("spec", "hard", "count/pods").Should().Be("15");
|
|
||||||
quota.Scalar("spec", "hard", "cpu").Should().Be("8");
|
|
||||||
quota.Scalar("spec", "hard", "memory").Should().Be("16Gi");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
[Trait("Question", "Q-RD-DESKTOP-CAP-5")]
|
|
||||||
public void FcDesktop_ResourceQuotaMustBeArgoCdAdoptableInPlace()
|
|
||||||
{
|
|
||||||
var quota = FcDesktopDocuments()
|
|
||||||
.Single(document => document.Kind == "ResourceQuota" && document.Name == "fc-desktop-cap");
|
|
||||||
|
|
||||||
quota.RelativePath.Should().StartWith("fc-desktop/");
|
|
||||||
quota.Scalar("metadata", "annotations", "argocd.argoproj.io/hook").Should().BeNull();
|
|
||||||
|
|
||||||
var syncOptions = quota.Scalar("metadata", "annotations", "argocd.argoproj.io/sync-options") ?? string.Empty;
|
|
||||||
syncOptions.Should().NotContain("Force=true");
|
|
||||||
syncOptions.Should().NotContain("Replace=true");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void FcDeviceManagement_MustShipExpectedManifestSet()
|
public void FcDeviceManagement_MustShipExpectedManifestSet()
|
||||||
{
|
{
|
||||||
@@ -696,13 +667,6 @@ public sealed class FleetManifestLintTests
|
|||||||
.Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
|
.Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
|
||||||
.ToList();
|
.ToList();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IReadOnlyList<ManifestDocument> FcDesktopDocuments()
|
|
||||||
{
|
|
||||||
return Inventory.Documents
|
|
||||||
.Where(document => document.RelativePath.StartsWith("fc-desktop/", StringComparison.Ordinal))
|
|
||||||
.ToList();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal sealed class ManifestInventory
|
internal sealed class ManifestInventory
|
||||||
|
|||||||
Reference in New Issue
Block a user