Compare commits
1 Commits
sprint39/c
...
sprint39/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
30e16bfcfb |
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*.yaml text eol=lf
|
||||||
|
*.yml text eol=lf
|
||||||
30
apps/fc-desktop/remotedesktop-pools.yaml
Normal file
30
apps/fc-desktop/remotedesktop-pools.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# FlowerCore RemoteDesktop warm-pool posture.
|
||||||
|
#
|
||||||
|
# The RemoteDesktop Web and Operator Deployments remain owned by
|
||||||
|
# FlowerCore.RemoteDesktop. bluejay-infra owns these GitOps pool intents so
|
||||||
|
# rebuilds preserve the operational posture without baking it into service code.
|
||||||
|
---
|
||||||
|
apiVersion: flowercore.io/v1
|
||||||
|
kind: RemoteDesktopPoolCrd
|
||||||
|
metadata:
|
||||||
|
name: qt-sdk-pool
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-pool
|
||||||
|
app.kubernetes.io/component: warm-pool
|
||||||
|
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||||
|
flowercore.io/template: dev-workstation
|
||||||
|
flowercore.io/image: localhost-fc-desktop-qt-sdk
|
||||||
|
annotations:
|
||||||
|
flowercore.io/deficit-tolerance: "0"
|
||||||
|
flowercore.io/scale-mode: ManualScaleOnDemand
|
||||||
|
flowercore.io/image-ref: localhost/fc-desktop:qt-sdk
|
||||||
|
flowercore.io/image-pull-policy: Never
|
||||||
|
spec:
|
||||||
|
templateSlug: dev-workstation
|
||||||
|
desiredSize: 0
|
||||||
|
enabled: false
|
||||||
|
userVolumeMode: LateAttach
|
||||||
|
deficitTolerance: 0
|
||||||
|
scaleMode: ManualScaleOnDemand
|
||||||
|
reconcileNow: false
|
||||||
@@ -824,53 +824,6 @@ data:
|
|||||||
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
summary: "desktop.iamworkin.lan TLS cert expires within 2 days"
|
||||||
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
description: "The desktop.iamworkin.lan cert is inside the 2-day renewal window and cert-manager has not renewed. Check cert-manager logs, step-ca reachability, and pfSense DNS overrides per the ACME DNS-01 gate."
|
||||||
|
|
||||||
- alert: LonghornPVCGrowthRapid
|
|
||||||
expr: |
|
|
||||||
(
|
|
||||||
(
|
|
||||||
(
|
|
||||||
longhorn_volume_actual_size_bytes
|
|
||||||
- (longhorn_volume_actual_size_bytes offset 1h)
|
|
||||||
)
|
|
||||||
/ clamp_min(longhorn_volume_actual_size_bytes offset 1h, 1)
|
|
||||||
)
|
|
||||||
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
|
||||||
(
|
|
||||||
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
|
||||||
* on(namespace, persistentvolumeclaim) group_left()
|
|
||||||
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
|
||||||
)
|
|
||||||
or
|
|
||||||
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
|
||||||
)
|
|
||||||
) > 0.20
|
|
||||||
or
|
|
||||||
(
|
|
||||||
(
|
|
||||||
longhorn_volume_actual_size_bytes
|
|
||||||
/ on(volume) clamp_min(longhorn_volume_capacity_bytes, 1)
|
|
||||||
)
|
|
||||||
* on(volume) group_left(namespace, persistentvolumeclaim) (
|
|
||||||
(
|
|
||||||
label_replace(kube_persistentvolumeclaim_info{storageclass="longhorn"}, "volume", "$1", "volumename", "(.+)")
|
|
||||||
* on(namespace, persistentvolumeclaim) group_left()
|
|
||||||
kube_persistentvolumeclaim_labels{label_flowercore_io_managed_by="remotedesktop"}
|
|
||||||
)
|
|
||||||
or
|
|
||||||
label_replace(kube_persistentvolumeclaim_info{namespace="fc-desktop", storageclass="longhorn", persistentvolumeclaim=~"fc-profile-.*|remotedesktop-data"}, "volume", "$1", "volumename", "(.+)")
|
|
||||||
)
|
|
||||||
) > 0.80
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
alert_channel: thermal_print
|
|
||||||
service: remotedesktop
|
|
||||||
annotations:
|
|
||||||
summary: "RemoteDesktop Longhorn PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} growing rapidly"
|
|
||||||
description: "Longhorn volume {{ $labels.volume }} backing RemoteDesktop PVC {{ $labels.namespace }}/{{ $labels.persistentvolumeclaim }} grew more than 20% in 1h or is over 80% capacity. Check for runaway SQLite/user-profile growth; this alert was added after the 2026-05-16 RemoteDesktop web SQLite Error 13 incident."
|
|
||||||
runbook: "1. kubectl -n {{ $labels.namespace }} describe pvc {{ $labels.persistentvolumeclaim }} 2. Open Longhorn UI volume {{ $labels.volume }} 3. Check RemoteDesktop web/user-volume SQLite files for permission or runaway growth 4. Expand PVC only after confirming the writer is healthy"
|
|
||||||
todo: "2026-05-19 metric gate: live noc1 Prometheus currently exposes kube_persistentvolumeclaim_info and kube_persistentvolumeclaim_resource_requests_storage_bytes, but not longhorn_volume_actual_size_bytes, longhorn_volume_capacity_bytes, kube_persistentvolumeclaim_labels, or kubelet_volume_stats_used_bytes. Keep the fc-desktop PVC fallback until kube-state-metrics label allowlist exposes flowercore.io/managed-by=remotedesktop."
|
|
||||||
|
|
||||||
- name: pi-fleet
|
- name: pi-fleet
|
||||||
rules:
|
rules:
|
||||||
- alert: PiManagerDown
|
- alert: PiManagerDown
|
||||||
|
|||||||
Reference in New Issue
Block a user