Compare commits
1 Commits
sprint40/c
...
sprint39/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6382582090 |
@@ -1,10 +1,13 @@
|
||||
# FlowerCore Remote Desktop — TLS + Ingress
|
||||
#
|
||||
# Source-of-truth split:
|
||||
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies
|
||||
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies,
|
||||
# and the explicit RemoteDesktopPoolCrd warm-pool intent in
|
||||
# remotedesktop-pools.yaml.
|
||||
# (see network-policies.yaml in this directory).
|
||||
# - FlowerCore.RemoteDesktop scripts/deploy-web.sh OWNS: Deployment +
|
||||
# Service. Reason: image refs like `localhost/fc-desktop:linux-xfce`
|
||||
# - FlowerCore.RemoteDesktop OWNS: CRD definition/operator Deployment and
|
||||
# scripts/deploy-web.sh Deployment + Service. Reason: image refs like
|
||||
# `localhost/fc-desktop:linux-xfce`
|
||||
# only exist on each node's containerd after a manual import, so a
|
||||
# Deployment manifest in bluejay-infra would race the image-import
|
||||
# step and crash-loop.
|
||||
|
||||
101
apps/fc-desktop/remotedesktop-pools.yaml
Normal file
101
apps/fc-desktop/remotedesktop-pools.yaml
Normal file
@@ -0,0 +1,101 @@
|
||||
# FlowerCore RemoteDesktop warm-pool intent.
|
||||
#
|
||||
# These CRDs are deliberately explicit. The RemoteDesktop warmup loop no
|
||||
# longer scans template defaults to decide what to warm; every enabled pool
|
||||
# here represents operator/GitOps intent and prevents a repeat of the
|
||||
# orphan-pool leak from 2026-05-08.
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: browser-lab-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: browser-only
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
reconcileNow: true
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: opensuse-xfce-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: opensuse-xfce
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
userVolumeMode: LateAttach
|
||||
reconcileNow: true
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: dev-workstation-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: dev-workstation
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
userVolumeMode: LateAttach
|
||||
reconcileNow: true
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: ai-station-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: ai-station
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
userVolumeMode: LateAttach
|
||||
reconcileNow: true
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: linux-xfce-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: linux-xfce
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
userVolumeMode: LateAttach
|
||||
reconcileNow: true
|
||||
---
|
||||
apiVersion: flowercore.io/v1
|
||||
kind: RemoteDesktopPoolCrd
|
||||
metadata:
|
||||
name: linux-xfce-rdp-pool
|
||||
namespace: fc-desktop
|
||||
labels:
|
||||
app.kubernetes.io/name: remotedesktop-pool
|
||||
app.kubernetes.io/part-of: flowercore-remotedesktop
|
||||
app.kubernetes.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
templateSlug: linux-xfce-rdp
|
||||
desiredSize: 1
|
||||
enabled: true
|
||||
userVolumeMode: LateAttach
|
||||
reconcileNow: true
|
||||
@@ -656,15 +656,14 @@ data:
|
||||
summary: "Print queue backlog on edge2 ({{ $value }} active jobs)"
|
||||
description: "CUPS has {{ $value }} active jobs queued. Possible printer jam, USB disconnect, or paper out."
|
||||
|
||||
# Printer hardware and paper-roll lifecycle alerts.
|
||||
# print_printer_online: 1 when the transport is reachable/selected.
|
||||
# print_printer_state enum: 0 unknown, 1 online, 2 offline,
|
||||
# 3 paper_depleted, 4 jam, 5 head_error, 6 cover_open.
|
||||
# Offline/jam/cover alerts stay IRC-only. Paper depleted and head
|
||||
# error may route to the thermal digest only when the printer is
|
||||
# online enough to make that useful.
|
||||
# Paper roll lifecycle alerts (XL Track I, 2026-04-26).
|
||||
# Source-of-truth gauge: print_paper_remaining_percent (Print.Web OTEL,
|
||||
# hydrated on startup from the active PaperRoll row).
|
||||
# alert_channel=thermal_print routes through irc-notify -> Print.Web
|
||||
# /api/print/alert so the printer announces its own paper-out warning
|
||||
# on its remaining paper. Self-referential humor + operator nudge.
|
||||
- alert: PrintPaperRollLow
|
||||
expr: (print_paper_remaining_percent{job="printweb-otel"} < 10 and print_paper_remaining_percent{job="printweb-otel"} > 5) and print_printer_online{job="printweb-otel"} == 1
|
||||
expr: print_paper_remaining_percent{job="printweb-otel"} < 10 and print_paper_remaining_percent{job="printweb-otel"} > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -673,59 +672,15 @@ data:
|
||||
summary: "Print roll low on edge2 ({{ $value | printf \"%.1f\" }}% remaining)"
|
||||
description: "NuPrint 210 paper roll has {{ $value | printf \"%.1f\" }}% remaining. Operator should load a fresh roll soon. Run /api/paper/status for the precise mm + estimated jobs left."
|
||||
|
||||
- alert: PrinterOfflineWarning
|
||||
expr: print_printer_state{job="printweb-otel"} == 2
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Print.Web printer offline on edge2"
|
||||
description: "Print.Web reports the NuPrint 210 transport is offline or unreachable. IRC-only by design: do not thermal-print an alert when the thermal printer itself is offline."
|
||||
|
||||
- alert: PrintPaperRollCritical
|
||||
expr: print_printer_state{job="printweb-otel"} == 3 and print_printer_online{job="printweb-otel"} == 1
|
||||
expr: print_paper_remaining_percent{job="printweb-otel"} <= 5
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
alert_channel: thermal_print
|
||||
annotations:
|
||||
summary: "Print paper depleted on edge2"
|
||||
description: "NuPrint 210 reports paper depleted while the printer is still online. Load a new roll, drain the hardware buffer if needed, then replay DeadLetter jobs from /print-log."
|
||||
|
||||
- alert: PrinterJamWarning
|
||||
expr: print_printer_state{job="printweb-otel"} == 4
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Print.Web printer jam on edge2"
|
||||
description: "Print.Web reports a paper/cutter jam state. IRC-only: clear the jam, drain the hardware buffer if bytes were queued, then retry affected jobs."
|
||||
|
||||
- alert: PrinterHeadErrorCritical
|
||||
expr: print_printer_state{job="printweb-otel"} == 5
|
||||
for: 2m
|
||||
labels:
|
||||
severity: critical
|
||||
service: print-web
|
||||
alert_channel: thermal_print
|
||||
annotations:
|
||||
summary: "Print.Web printer head error on edge2"
|
||||
description: "Print.Web reports a thermal head or unrecoverable printer error. Critical routing may enter the thermal digest per existing policy; IRC remains the primary triage stream."
|
||||
|
||||
- alert: PrinterCoverOpenWarning
|
||||
expr: print_printer_state{job="printweb-otel"} == 6
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Print.Web printer cover open on edge2"
|
||||
description: "Print.Web reports the printer cover/lid is open. IRC-only: close the cover and verify /api/print/status before retrying jobs."
|
||||
summary: "Print roll critical on edge2 ({{ $value | printf \"%.1f\" }}% remaining)"
|
||||
description: "NuPrint 210 paper roll at {{ $value | printf \"%.1f\" }}% — load a new roll NOW. The 50ft roll has a ~12% red-stripe zone; once paper passes that, the printer can run dry mid-job."
|
||||
|
||||
- alert: PrintJobDeadLetter
|
||||
expr: increase(print_jobs_dead_letter_total[15m]) > 0
|
||||
@@ -3680,146 +3635,6 @@ data:
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
||||
- orgId: 1
|
||||
name: Print Services
|
||||
folder: Print Alerts
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: printer-offline-warning
|
||||
title: PrinterOfflineWarning
|
||||
condition: C
|
||||
for: 2m
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: "Print.Web printer offline on edge2"
|
||||
description: "Print.Web reports the NuPrint 210 transport is offline or unreachable. IRC-only by design: do not thermal-print an alert when the thermal printer itself is offline."
|
||||
runbook: "1. Check edge2 power/network 2. Check USB/CUPS queue 3. Open https://print.iamworkin.lan/admin 4. Do not force thermal routing for offline alerts."
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'print_printer_state{job="printweb-otel"} == 2', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- uid: print-paper-roll-critical
|
||||
title: PrintPaperRollCritical
|
||||
condition: C
|
||||
for: 2m
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: "Print paper depleted on edge2"
|
||||
description: "NuPrint 210 reports paper depleted while the printer is still online. Load a new roll, drain the hardware buffer if needed, then replay DeadLetter jobs from /print-log."
|
||||
runbook: "1. Load a fresh roll 2. Drain the hardware buffer if paper-out happened mid-job 3. Open https://print.iamworkin.lan/print-log 4. Retry DeadLetter jobs after the state clears."
|
||||
labels:
|
||||
severity: critical
|
||||
service: print-web
|
||||
alert_channel: thermal_print
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'print_printer_state{job="printweb-otel"} == 3 and print_printer_online{job="printweb-otel"} == 1', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- uid: printer-jam-warning
|
||||
title: PrinterJamWarning
|
||||
condition: C
|
||||
for: 2m
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: "Print.Web printer jam on edge2"
|
||||
description: "Print.Web reports a paper/cutter jam state. IRC-only: clear the jam, drain the hardware buffer if bytes were queued, then retry affected jobs."
|
||||
runbook: "1. Clear paper/cutter path 2. Drain hardware buffer if CUPS queued bytes 3. Verify /api/print/status 4. Retry affected jobs."
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'print_printer_state{job="printweb-otel"} == 4', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- uid: printer-head-error-critical
|
||||
title: PrinterHeadErrorCritical
|
||||
condition: C
|
||||
for: 2m
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: "Print.Web printer head error on edge2"
|
||||
description: "Print.Web reports a thermal head or unrecoverable printer error. Critical routing may enter the thermal digest per existing policy; IRC remains the primary triage stream."
|
||||
runbook: "1. Let the printer cool if overheated 2. Power-cycle only after checking queued jobs 3. Verify /api/print/status 4. Retry jobs after the state clears."
|
||||
labels:
|
||||
severity: critical
|
||||
service: print-web
|
||||
alert_channel: thermal_print
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'print_printer_state{job="printweb-otel"} == 5', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- uid: printer-cover-open-warning
|
||||
title: PrinterCoverOpenWarning
|
||||
condition: C
|
||||
for: 2m
|
||||
noDataState: OK
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: "Print.Web printer cover open on edge2"
|
||||
description: "Print.Web reports the printer cover/lid is open. IRC-only: close the cover and verify /api/print/status before retrying jobs."
|
||||
runbook: "1. Close the printer cover 2. Verify /api/print/status returns online 3. Retry affected jobs only after the state clears."
|
||||
labels:
|
||||
severity: warning
|
||||
service: print-web
|
||||
alert_channel: irc
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'print_printer_state{job="printweb-otel"} == 6', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- orgId: 1
|
||||
name: CI Runners
|
||||
folder: CI Alerts
|
||||
|
||||
@@ -304,7 +304,7 @@ public sealed class FleetManifestLintTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_MustIncludeRequiredAlertRoutingGuards()
|
||||
public void Monitoring_MustAlertWhenLinuxRunnerDeploymentIsUnavailable()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
@@ -315,15 +315,6 @@ public sealed class FleetManifestLintTests
|
||||
monitoring.Should().Contain("folder: CI Alerts");
|
||||
monitoring.Should().Contain("uid: linux-runner-offline");
|
||||
monitoring.Should().Contain("alert_channel: irc");
|
||||
|
||||
monitoring.Should().Contain("PrinterOfflineWarning");
|
||||
monitoring.Should().Contain("expr: print_printer_state{job=\"printweb-otel\"} == 2");
|
||||
monitoring.Should().Contain("IRC-only by design: do not thermal-print an alert when the thermal printer itself is offline.");
|
||||
monitoring.Should().Contain("PrintPaperRollCritical");
|
||||
monitoring.Should().Contain("expr: print_printer_state{job=\"printweb-otel\"} == 3 and print_printer_online{job=\"printweb-otel\"} == 1");
|
||||
monitoring.Should().Contain("PrinterJamWarning");
|
||||
monitoring.Should().Contain("PrinterHeadErrorCritical");
|
||||
monitoring.Should().Contain("PrinterCoverOpenWarning");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
@@ -396,6 +387,38 @@ public sealed class FleetManifestLintTests
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RemoteDesktopPoolCrds_MustExplicitlyOptInHookReadyTemplates()
|
||||
{
|
||||
var expectedModes = new Dictionary<string, string?>(StringComparer.Ordinal)
|
||||
{
|
||||
["browser-only"] = null,
|
||||
["opensuse-xfce"] = "LateAttach",
|
||||
["dev-workstation"] = "LateAttach",
|
||||
["ai-station"] = "LateAttach",
|
||||
["linux-xfce"] = "LateAttach",
|
||||
["linux-xfce-rdp"] = "LateAttach",
|
||||
};
|
||||
|
||||
var pools = Inventory.Documents
|
||||
.Where(document => document.Kind == "RemoteDesktopPoolCrd")
|
||||
.Where(document => document.RelativePath == "fc-desktop/remotedesktop-pools.yaml")
|
||||
.ToDictionary(
|
||||
document => document.Scalar("spec", "templateSlug") ?? string.Empty,
|
||||
StringComparer.Ordinal);
|
||||
|
||||
pools.Keys.Should().BeEquivalentTo(expectedModes.Keys);
|
||||
foreach (var expected in expectedModes)
|
||||
{
|
||||
var pool = pools[expected.Key];
|
||||
pool.Namespace.Should().Be("fc-desktop");
|
||||
pool.Scalar("spec", "desiredSize").Should().Be("1");
|
||||
pool.Scalar("spec", "enabled").Should().Be("true");
|
||||
pool.Scalar("spec", "reconcileNow").Should().Be("true");
|
||||
pool.Scalar("spec", "userVolumeMode").Should().Be(expected.Value);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PublicEgressDeployments_MustOptOutOfIamworkinLanSearchSuffixes()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user