github-runner: add DM and WorldBuilder runners

2026-05-18 17:44:29 -05:00
12 changed files with 549 additions and 557 deletions
--- a/apps/brochure/README.md
+++ b/apps/brochure/README.md
@@ -0,0 +1,27 @@
 # FlowerCore Brochure
 `apps/brochure` hosts the public brochure split from `FlowerCore.Intranet.Web`.
 ArgoCD's `apps/*` ApplicationSet will create `infra-brochure` after this
 directory lands on `main`.
 ## Runtime
 - Host: `https://brochure.flowercore.io`
 - Namespace: `brochure`
 - Deployment: `brochure-web`
 - Image: `localhost/fc-brochure-web:v20260524-sprint32`
 - Port: `8080`
 - Public route method allowlist: `GET` and `HEAD`
 ## Operator Actions
 1. Publish and import `localhost/fc-brochure-web:v20260524-sprint32` to every
   RKE2 node before sync, using the same podman save + `ctr images import`
   flow as the Intranet deployment.
 2. Create the Cloudflare DNS record for `brochure.flowercore.io` pointing at
   the FlowerCore public edge.
 3. Verify `infra-brochure` appears in ArgoCD, the certificate becomes Ready,
   and `GET https://brochure.flowercore.io/` returns `200`.
 The route intentionally does not expose `/ops/*` or `/admin/*`; the Brochure
 web app returns `404` for those paths and Traefik only forwards read methods.
--- a/apps/brochure/brochure.yaml
+++ b/apps/brochure/brochure.yaml
@@ -0,0 +1,131 @@
 # FlowerCore Brochure public host
 #
 # Thin Blazor host for public What's New, walkthrough, and gallery content
 # carved out of FlowerCore.Intranet.Web. The ApplicationSet creates
 # infra-brochure from this directory after merge.
 ---
 apiVersion: v1
 kind: Namespace
 metadata:
  name: brochure
  labels:
    app.kubernetes.io/part-of: flowercore
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: brochure-web
  namespace: brochure
  labels:
    app: brochure-web
    app.kubernetes.io/name: brochure-web
    app.kubernetes.io/part-of: flowercore
 spec:
  replicas: 1
  revisionHistoryLimit: 3
  selector:
    matchLabels:
      app: brochure-web
  template:
    metadata:
      labels:
        app: brochure-web
        app.kubernetes.io/name: brochure-web
        app.kubernetes.io/part-of: flowercore
    spec:
      containers:
        - name: brochure-web
          image: localhost/fc-brochure-web:v20260524-sprint32
          imagePullPolicy: Never
          ports:
            - containerPort: 8080
              name: http
          env:
            - name: ASPNETCORE_ENVIRONMENT
              value: Production
            - name: ASPNETCORE_URLS
              value: "http://+:8080"
          resources:
            requests:
              cpu: "25m"
              memory: "128Mi"
            limits:
              cpu: "500m"
              memory: "512Mi"
          readinessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 10
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /health
              port: http
            initialDelaySeconds: 30
            periodSeconds: 30
          securityContext:
            runAsNonRoot: true
            runAsUser: 1654
            runAsGroup: 1654
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            capabilities:
              drop:
                - ALL
          volumeMounts:
            - name: tmp
              mountPath: /tmp
      volumes:
        - name: tmp
          emptyDir: {}
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: brochure-web
  namespace: brochure
  labels:
    app: brochure-web
    app.kubernetes.io/name: brochure-web
    app.kubernetes.io/part-of: flowercore
 spec:
  type: ClusterIP
  selector:
    app: brochure-web
  ports:
    - name: http
      port: 8080
      targetPort: http
 ---
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
  name: brochure-web-tls
  namespace: brochure
 spec:
  secretName: brochure-web-tls
  issuerRef:
    name: step-ca-acme
    kind: ClusterIssuer
  dnsNames:
    - brochure.flowercore.io
  duration: 720h
  renewBefore: 240h
 ---
 apiVersion: traefik.io/v1alpha1
 kind: IngressRoute
 metadata:
  name: brochure-web-public
  namespace: brochure
 spec:
  entryPoints:
    - websecure
  routes:
    - match: Host(`brochure.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
      kind: Rule
      services:
        - name: brochure-web
          port: 8080
  tls:
    secretName: brochure-web-tls
--- a/apps/fc-build-windows/README.md
+++ b/apps/fc-build-windows/README.md
@@ -1,263 +0,0 @@
 # fc-build-windows runner gate
 Status: OPEN-WITH-OPERATOR-ACTION as of 2026-05-20.
 This directory is intentionally not a live runner deployment. It records the
 exact gate for bringing up the Windows self-hosted runner fleet without faking
 capacity in GitHub or Kubernetes.
 ## Lane evidence
 - `D:\git\FlowerCore\FlowerCore.Notes\docs\dashboards\decisions-waiting.html`
  lines 15078-15085: Q-MR-82 says the Updater Windows Sandbox E2E run is
  queued and `bluejay-ws-sandbox-1` is offline.
 - `D:\git\FlowerCore\FlowerCore.Notes\memory\project_morning_routine_8_2026_05_20.md`:
  Morning Routine #8 carries Q-MR-82 as the fleet-wide Windows runner gap.
 - `D:\git\FlowerCore\FlowerCore.Notes\docs\standards\sprint-37-codex-dispatch-log-2026-05-19.md`
  lines 76, 84-85, and 97: keep BLUEJAY-WS out of runner plans, merge Linux
  runner expansion separately, and keep true Windows-only workflows parked on
  the Windows runner host substrate path.
 - `D:\git\FlowerCore\FlowerCore.Notes\docs\ai-agents\codex-prompts\2026-05-20-xxxxl-sprint-42-orchestrator-briefs.md`
  lane Cx-5: land a deployment only if a Windows runner image/substrate is
  ready; otherwise commit an operator-action gate.
 - `D:\git\FlowerCore\FlowerCore.Notes\memory\feedback_bluejay_ws_never_a_github_runner.md`:
  BLUEJAY-WS is operator-only territory; Windows runners belong on a dedicated
  KubeVirt Windows VM such as `ci1` or a sibling VM.
 ## Live probe summary
 Commands run on 2026-05-20 from `D:\git\FlowerCore\bluejay-infra`:
 ```powershell
 $env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
 kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"`t"}{.metadata.labels.kubernetes\.io/os}{"`n"}{end}'
 ```
 Result: `rke2-agent1`, `rke2-agent2`, and `rke2-server` all report
 `kubernetes.io/os=linux`. There is no Windows Kubernetes node, so Windows
 containers on RKE2 cannot satisfy `fc-build-windows`.
 ```powershell
 kubectl -n kubevirt-vms get vm,vmi,pods -o wide
 ```
 Result: KubeVirt is healthy and `ci1` is `Running` / `Ready=True` on
 `rke2-agent1` with VMI IP `10.42.103.35`.
 ```powershell
 virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml port-forward vm/ci1.kubevirt-vms 15985:5985
 ```
 Result during port tests: `dial tcp 10.42.103.35:5985: connect: no route to
 host`. The same result was seen for RDP 3389 and SSH 22. The VM exists, but it
 is not remotely reachable for runner bootstrap from this lane.
 ```powershell
 gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
  --jq '.runners[]? | {name,status,busy,labels:[.labels[].name]}'
 gh run list --repo astoltz/FlowerCore.Updater `
  --workflow "Updater Windows Sandbox E2E" --limit 5
 ```
 Result: GitHub has one Updater runner, `bluejay-ws-sandbox-1`, with
 `status=offline`; run `26150689447` is still `queued`.
 ## Feasibility classification
 ### Option A: Windows containers on RKE2
 Not feasible without operator-physical infrastructure work. Kubernetes Windows
 containers require a Windows node. The current cluster has Linux-only RKE2
 nodes.
 ### Option B: KubeVirt Windows VM
 Partially present, not deployable from this lane.
 `apps/kubevirt-vms/ci1.yaml` already defines a Windows Server 2025 KubeVirt VM
 using `localhost/fc-win-server-2025:v1`, and the live VM is running. However:
 - the guest is not reachable over RDP, WinRM, or SSH through `virtctl
  port-forward`;
 - the current root disk is a `containerDisk`, so runner installation inside the
  running guest is not a durable fleet state unless the first-boot automation
  re-registers on every boot or the VM is moved to a persistent PVC-backed
  disk;
 - FC.Updater `Updater Windows Sandbox E2E` uses
  `[self-hosted, windows, windows-sandbox]`, while `fc-build-windows` build jobs
  use `[self-hosted, windows, fc-build-windows]`. Do not advertise
  `windows-sandbox` until Windows Sandbox has been proven in the guest.
 ### Option C: bluejay-ws-sandbox-1
 Operator-only emergency fallback. GitHub shows it registered but offline. The
 current memory says BLUEJAY-WS must not be a fleet runner host, so this lane
 does not start or re-register it. If the operator deliberately overrides the
 policy to drain an emergency queue, start the existing visible runner console
 from the BLUEJAY-WS desktop and treat that as temporary break-glass, not the
 permanent Q-MR-82 closure.
 ## Operator action plan
 ### 1. Pick the Windows host class
 Use `ci1` or a sibling Windows Server 2025 VM for WPF build/test jobs that need
 `fc-build-windows`.
 Use a Windows 11 Pro/Enterprise KubeVirt VM for Updater or WorldBuilder
 Windows Sandbox gates, unless Windows Sandbox support is explicitly proven on
 the selected guest. The workflow labels must match the real capability:
 - WPF build runner: `self-hosted,windows,fc-build-windows,ci1`
 - Sandbox runner: `self-hosted,windows,windows-sandbox,ci-sandbox1`
 ### 2. Make the VM reachable and durable
 From BLUEJAY-WS:
 ```powershell
 $env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
 kubectl -n kubevirt-vms get vm,vmi,pods -o wide
 virtctl --kubeconfig $env:KUBECONFIG vnc ci1 -n kubevirt-vms
 virtctl --kubeconfig $env:KUBECONFIG port-forward vm/ci1.kubevirt-vms 13389:3389
 virtctl --kubeconfig $env:KUBECONFIG port-forward vm/ci1.kubevirt-vms 15985:5985
 ```
 Before runner registration, fix the current port-forward failure. The expected
 state is that RDP or WinRM accepts a connection through the control plane.
 For durability, either:
 - move the runner VM to a persistent PVC-backed root disk; or
 - keep `containerDisk` and bake first-boot runner registration into the sysprep
  flow using a non-expiring credential lookup path.
 Do not install a runner by hand into a transient VM and call Q-MR-82 closed.
 ### 3. Install runner prerequisites inside the VM
 Run in an elevated PowerShell session in the Windows runner guest:
 ```powershell
 winget install Microsoft.DotNet.SDK.10 --silent
 winget install Microsoft.DotNet.DesktopRuntime.8 --silent
 winget install Microsoft.PowerShell --silent
 winget install Git.Git --silent
 winget install Microsoft.VisualStudio.2022.BuildTools --silent
 winget install Google.Chrome --silent
 ```
 For a Sandbox-capable runner only:
 ```powershell
 Enable-WindowsOptionalFeature -Online -FeatureName Containers-DisposableClientVM -All
 Restart-Computer -Force
 ```
 After reboot:
 ```powershell
 Get-CimInstance -ClassName Win32_OptionalFeature -Filter "Name='Containers-DisposableClientVM'"
 Test-Path C:\Windows\System32\WindowsSandbox.exe
 ```
 ### 4. Register repo-scoped GitHub runners
 The `astoltz` account uses repo-scoped runners. Generate a fresh one-hour
 registration token per repo immediately before `config.cmd`.
 From a trusted operator shell with `gh` authenticated:
 ```powershell
 $repos = @(
  "FlowerCore.Updater",
  "FlowerCore.WorldBuilder",
  "FlowerCore.DeviceManagement"
 )
 foreach ($repo in $repos) {
  $token = gh api -X POST "/repos/astoltz/$repo/actions/runners/registration-token" --jq .token
  $repoSlug = $repo.ToLowerInvariant().Replace("flowercore.", "").Replace(".", "-")
  $runnerDir = "C:\fc-ghr\$repoSlug-fc-build-windows"
  New-Item -ItemType Directory -Force -Path $runnerDir | Out-Null
  Set-Location $runnerDir
  if (-not (Test-Path ".\config.cmd")) {
    Invoke-WebRequest `
      -Uri "https://github.com/actions/runner/releases/download/v2.323.0/actions-runner-win-x64-2.323.0.zip" `
      -OutFile "actions-runner.zip"
    Add-Type -AssemblyName System.IO.Compression.FileSystem
    [System.IO.Compression.ZipFile]::ExtractToDirectory((Resolve-Path actions-runner.zip), $runnerDir)
  }
  .\config.cmd `
    --url "https://github.com/astoltz/$repo" `
    --token $token `
    --name "ci1-$repoSlug-fc-build-windows" `
    --labels "self-hosted,windows,fc-build-windows,ci1" `
    --work "_work" `
    --unattended `
    --replace
  .\svc.ps1 install
  .\svc.ps1 start
 }
 ```
 For Updater Sandbox E2E, register only after the guest proves Sandbox support,
 and use `windows-sandbox` labels:
 ```powershell
 $token = gh api -X POST "/repos/astoltz/FlowerCore.Updater/actions/runners/registration-token" --jq .token
 .\config.cmd `
  --url "https://github.com/astoltz/FlowerCore.Updater" `
  --token $token `
  --name "ci-sandbox1-updater" `
  --labels "self-hosted,windows,windows-sandbox,ci-sandbox1" `
  --work "_work" `
  --unattended `
  --replace
 ```
 Keep registration tokens out of Git and logs. The durable credential source for
 automation should be the existing 1Password item named `GitHub PAT (Runner
 Registration)`, used only to mint short-lived repo registration tokens.
 ### 5. Verify GitHub and workflow pickup
 ```powershell
 gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
  --jq '.runners[] | select(.labels[].name == "windows-sandbox") | {name,status,busy,labels:[.labels[].name]}'
 gh api /repos/astoltz/FlowerCore.DeviceManagement/actions/runners `
  --jq '.runners[] | select(.labels[].name == "fc-build-windows") | {name,status,busy,labels:[.labels[].name]}'
 gh run list --repo astoltz/FlowerCore.Updater `
  --workflow "Updater Windows Sandbox E2E" --limit 3
 ```
 Q-MR-82 can be marked resolved only after the Updater run moves from `queued` to
 `in_progress` or `completed` on an online runner, or after the affected WPF
 build repos show online `fc-build-windows` repo-scoped runners and their queued
 jobs start.
 ## Break-glass BLUEJAY-WS command
 Only if the operator explicitly overrides the "BLUEJAY-WS is not a runner"
 policy to drain a queue:
 ```powershell
 Set-Location C:\fc-ghr\updater-sandbox
 .\run.cmd
 ```
 If a Windows service exists:
 ```powershell
 Get-Service 'actions.runner.*'
 Start-Service 'actions.runner.*'
 ```
 This does not close Q-MR-82 permanently. It is a temporary queue drain until a
 dedicated VM runner is online.
--- a/apps/fc-build-windows/kustomization.yaml
+++ b/apps/fc-build-windows/kustomization.yaml
@@ -1,4 +0,0 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - operator-gate-configmap.yaml
--- a/apps/fc-build-windows/operator-gate-configmap.yaml
+++ b/apps/fc-build-windows/operator-gate-configmap.yaml
@@ -1,61 +0,0 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: fc-build-windows-operator-gate
  namespace: kubevirt-vms
  labels:
    app.kubernetes.io/name: fc-build-windows
    app.kubernetes.io/component: operator-gate
    app.kubernetes.io/part-of: github-runner
    flowercore.io/q-card: Q-MR-82
  annotations:
    flowercore.io/outcome: OPEN-WITH-OPERATOR-ACTION
    flowercore.io/live-runner: "false"
 data:
  outcome: OPEN-WITH-OPERATOR-ACTION
  gate.md: |
    Do not treat this ConfigMap as runner capacity.
    Current probe, 2026-05-20:
    - RKE2 nodes are linux-only; Windows containers require a Windows node.
    - KubeVirt `ci1` is Running/Ready, but RDP 3389, WinRM 5985, and SSH 22
      through `virtctl port-forward` return `connect: no route to host`.
    - GitHub Updater runner list has only `bluejay-ws-sandbox-1`, status
      offline. Updater Windows Sandbox E2E run 26150689447 remains queued.
    Required operator action:
    1. Make a dedicated Windows VM reachable and durable.
    2. Install .NET 10 SDK, .NET 8 Desktop Runtime, Git, VS Build Tools, and
       PowerShell 7.
    3. Register repo-scoped runners with short-lived GitHub registration tokens.
    4. Add `fc-build-windows` labels only to WPF build-capable guests.
    5. Add `windows-sandbox` labels only after Sandbox support is proven.
  registration-token-pattern.ps1: |
    $repo = "FlowerCore.Updater"
    $token = gh api -X POST "/repos/astoltz/$repo/actions/runners/registration-token" --jq .token
    $runnerDir = "C:\fc-ghr\updater-fc-build-windows"
    New-Item -ItemType Directory -Force -Path $runnerDir | Out-Null
    Set-Location $runnerDir
    # Install the Actions runner package here if config.cmd is absent.
    .\config.cmd `
      --url "https://github.com/astoltz/$repo" `
      --token $token `
      --name "ci1-updater-fc-build-windows" `
      --labels "self-hosted,windows,fc-build-windows,ci1" `
      --work "_work" `
      --unattended `
      --replace
    .\svc.ps1 install
    .\svc.ps1 start
  verification.ps1: |
    gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
      --jq '.runners[] | {name,status,busy,labels:[.labels[].name]}'
    gh run list --repo astoltz/FlowerCore.Updater `
      --workflow "Updater Windows Sandbox E2E" --limit 3
    $env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
    kubectl -n kubevirt-vms get vm,vmi,pods -o wide
--- a/apps/fc-devicemgmt/argocd-application.yaml
+++ b/apps/fc-devicemgmt/argocd-application.yaml
@@ -0,0 +1,33 @@
 # Explicit ArgoCD Application shape for bootstrap/review.
 #
 # The live bluejay-infra ApplicationSet already discovers apps/* directories
 # and creates this same Application name (`infra-fc-devicemgmt`) automatically.
 # Keep repoURL on the internal Gitea ClusterIP URL; ArgoCD does not trust the
 # external step-ca HTTPS endpoint.
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: infra-fc-devicemgmt
  namespace: argocd
  labels:
    app.kubernetes.io/name: fc-devicemgmt
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  project: default
  source:
    repoURL: http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git
    targetRevision: main
    path: apps/fc-devicemgmt
  destination:
    server: https://kubernetes.default.svc
    namespace: fc-devicemgmt
  syncPolicy:
    automated:
      prune: true
      selfHeal: true
    syncOptions:
      - CreateNamespace=true
      - ServerSideApply=true
--- a/apps/fc-devicemgmt/deployment-operator.yaml
+++ b/apps/fc-devicemgmt/deployment-operator.yaml
@@ -47,7 +47,7 @@ spec:
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: operator
-          image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
+          image: localhost/fc-devicemgmt-operator:v20260512-cx5
          imagePullPolicy: Never
          ports:
            - name: metrics
--- a/apps/fc-devicemgmt/deployment-web.yaml
+++ b/apps/fc-devicemgmt/deployment-web.yaml
@@ -4,22 +4,6 @@
 # Sprint 9+ lane. This manifest is static-valid without requiring the image to
 # exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
 # nodes before letting ArgoCD sync a live rollout.
 #
 # SCALED TO 0 — 2026-05-19 morning-routine cleanup.
 # The Web pod cannot start until TWO upstream gaps close:
 #   1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
 #      provisioned via fc-mysql Manager. The cluster currently has ZERO
 #      MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
 #      deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
 #      points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
 #   2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
 #      with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
 #      mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
 #      from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
 #      password configured for the MySQL user.
 # Re-enable: change replicas back to 2 after both gaps close. The image tag
 # in this file (v20260512-cx5) MAY also need a refresh — it predates the
 # Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -36,7 +20,7 @@ metadata:
  annotations:
    flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
 spec:
-  replicas: 0
+  replicas: 2
  revisionHistoryLimit: 3
  selector:
    matchLabels:
--- a/apps/github-runner/README.md
+++ b/apps/github-runner/README.md
@@ -28,6 +28,10 @@ Sprint 32 final long-tail wave adds 16 two-replica Deployments:
 `FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
 `FlowerCore.MenuBoard`.
 Sprint 37 Cx-2 closes the audited Linux runner gaps for
 `FlowerCore.DeviceManagement` and `FlowerCore.WorldBuilder` with the same
 two-replica `emptyDir` pattern.
 ## Post-Merge Proof
 After the PR is merged and ArgoCD syncs, verify the runner fleet:
@@ -47,7 +51,7 @@ for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore
            FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
            FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
            FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
-            FlowerCore.MenuBoard; do
+            FlowerCore.MenuBoard FlowerCore.DeviceManagement FlowerCore.WorldBuilder; do
  echo "=== $repo ==="
  gh api "/repos/astoltz/$repo/actions/runners" \
    --jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
@@ -64,6 +68,20 @@ gh run list --repo astoltz/FlowerCore.Shared.Pos \
 If the latest run is still queued after runner registration, rerun the workflow
 from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
 ## Sprint 37 Cx-2 Gap Audit
 The 2026-05-18 GitHub workflow scan found these remaining repos with
 `runs-on: [self-hosted, linux, fc-build-linux]` but no K8s runner Deployment:
 `FlowerCore.AiStation.Linux`, `FlowerCore.PHP`, `FlowerCore.PiManager`,
 `FlowerCore.Shared.Barcodes`, `FlowerCore.Shared.Lookup`,
 `FlowerCore.Shared.Nodes`, `FlowerCore.Shared.PrintClient`,
 `FlowerCore.Shared.Relay`, `FlowerCore.Shared.ShowRunner`, and
 `FlowerCore.Shared.Storage`.
 Mixed/platform repos also have Linux workflow legs but need owner review before
 adding Linux runner Deployments: `FlowerCore.Library.Mac`,
 `FlowerCore.Signage.Agent.AppleTv`, and `FlowerCore.Signage.Player.Wpf`.
 ## Failure Notes
 - `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
--- a/apps/github-runner/github-runner.yaml
+++ b/apps/github-runner/github-runner.yaml
@@ -16,6 +16,8 @@
 #   DNS, Distribution, Scoreboard, SegmentDisplay, Signage.Contracts,
 #   SignalControl, Intranet.Web, Provisioning, Redis, MessageBoard, MenuBoard
 #   (Sprint 32 final long-tail wave; two replicas each, emptyDir cache)
 #   FlowerCore.DeviceManagement, WorldBuilder (Sprint 37 Cx-2 runner gap
 #   closure; two replicas each, emptyDir cache)
 #
 # Non-root CI safety:
 #   Runner pods run as uid 1001. HOME, DOTNET_INSTALL_DIR, DOTNET_CLI_HOME,
@@ -3767,9 +3769,271 @@ spec:
        - name: tmp
          emptyDir: {}
      restartPolicy: Always
 ---
 # Runner for FlowerCore.DeviceManagement. Added 2026-05-18 (Sprint 37 Cx-2)
 # to close the Linux CI capacity gap for the DM service-tier workflows. Mirrors
 # the Sprint 32 long-tail emptyDir pattern: two replicas, shared
 # 1Password-backed ACCESS_TOKEN, and the common ServiceAccount.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: github-runner-devicemgmt
  namespace: github-runner
  labels:
    app.kubernetes.io/name: github-runner-devicemgmt
    app.kubernetes.io/component: runner
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/created-by: argocd
    flowercore.io/runner-repo: devicemgmt
    flowercore.io/github-repo: FlowerCore.DeviceManagement
 spec:
  replicas: 2
  selector:
    matchLabels:
      app.kubernetes.io/name: github-runner-devicemgmt
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app.kubernetes.io/name: github-runner-devicemgmt
        app.kubernetes.io/component: runner
        app.kubernetes.io/part-of: flowercore
        flowercore.io/created-by: argocd
        flowercore.io/runner-repo: devicemgmt
        flowercore.io/github-repo: FlowerCore.DeviceManagement
    spec:
      serviceAccountName: github-runner
      securityContext:
        runAsNonRoot: true
        runAsUser: 1001
        runAsGroup: 1001
        fsGroup: 1001
      initContainers:
        - name: setup-runner-home
          image: busybox:1.36
          command:
            - sh
            - -c
            - |
              set -e
              mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
              chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
              chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
          securityContext:
            runAsUser: 0
            runAsNonRoot: false
          volumeMounts:
            - name: runner-home
              mountPath: /home/runner
      containers:
        - name: runner
          image: myoung34/github-runner:latest
          imagePullPolicy: Always
          env:
            - name: REPO_URL
              value: "https://github.com/astoltz/FlowerCore.DeviceManagement"
            - name: RUNNER_NAME_PREFIX
              value: "rke2-linux-devicemgmt"
            - name: RUNNER_WORKDIR
              value: "/tmp/runner/work"
            - name: EPHEMERAL
              value: "true"
            - name: LABELS
              value: "self-hosted,linux,fc-build-linux"
            - name: HOME
              value: "/home/runner"
            - name: DOTNET_INSTALL_DIR
              value: "/home/runner/.dotnet"
            - name: DOTNET_CLI_TELEMETRY_OPTOUT
              value: "1"
            - name: DOTNET_NOLOGO
              value: "1"
            - name: DOTNET_GENERATE_ASPNET_CERTIFICATE
              value: "false"
            - name: DOTNET_CLI_HOME
              value: "/home/runner"
            - name: NUGET_PACKAGES
              value: "/home/runner/.nuget/packages"
            - name: XDG_CACHE_HOME
              value: "/home/runner/.cache"
            - name: RUNNER_TOOL_CACHE
              value: "/home/runner/_tool"
            - name: ACCESS_TOKEN
              valueFrom:
                secretKeyRef:
                  name: github-runner-token
                  key: credential
            - name: RUN_AS_ROOT
              value: "false"
          resources:
            requests:
              cpu: "500m"
              memory: "1Gi"
            limits:
              cpu: "2000m"
              memory: "4Gi"
          volumeMounts:
            - name: runner-home
              mountPath: /home/runner
            - name: nuget-cache
              mountPath: /home/runner/.nuget/packages
            - name: tmp
              mountPath: /tmp
          livenessProbe:
            exec:
              command:
                - /bin/sh
                - -c
                - "pgrep -f Runner.Listener > /dev/null"
            initialDelaySeconds: 30
            periodSeconds: 30
            failureThreshold: 3
      volumes:
        - name: runner-home
          emptyDir: {}
        - name: nuget-cache
          emptyDir:
            sizeLimit: 2Gi
        - name: tmp
          emptyDir: {}
      restartPolicy: Always
 ---
 # Runner for FlowerCore.WorldBuilder. Added 2026-05-18 (Sprint 37 Cx-2)
 # to unblock WorldBuilder Linux CI jobs after the runner fleet audit found no
 # repo-scoped deployment for the GitHub repo. Mirrors the Sprint 32 long-tail
 # emptyDir pattern: two replicas, shared 1Password-backed ACCESS_TOKEN, and
 # the common ServiceAccount.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: github-runner-worldbuilder
  namespace: github-runner
  labels:
    app.kubernetes.io/name: github-runner-worldbuilder
    app.kubernetes.io/component: runner
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/created-by: argocd
    flowercore.io/runner-repo: worldbuilder
    flowercore.io/github-repo: FlowerCore.WorldBuilder
 spec:
  replicas: 2
  selector:
    matchLabels:
      app.kubernetes.io/name: github-runner-worldbuilder
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app.kubernetes.io/name: github-runner-worldbuilder
        app.kubernetes.io/component: runner
        app.kubernetes.io/part-of: flowercore
        flowercore.io/created-by: argocd
        flowercore.io/runner-repo: worldbuilder
        flowercore.io/github-repo: FlowerCore.WorldBuilder
    spec:
      serviceAccountName: github-runner
      securityContext:
        runAsNonRoot: true
        runAsUser: 1001
        runAsGroup: 1001
        fsGroup: 1001
      initContainers:
        - name: setup-runner-home
          image: busybox:1.36
          command:
            - sh
            - -c
            - |
              set -e
              mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
              chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
              chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
          securityContext:
            runAsUser: 0
            runAsNonRoot: false
          volumeMounts:
            - name: runner-home
              mountPath: /home/runner
      containers:
        - name: runner
          image: myoung34/github-runner:latest
          imagePullPolicy: Always
          env:
            - name: REPO_URL
              value: "https://github.com/astoltz/FlowerCore.WorldBuilder"
            - name: RUNNER_NAME_PREFIX
              value: "rke2-linux-worldbuilder"
            - name: RUNNER_WORKDIR
              value: "/tmp/runner/work"
            - name: EPHEMERAL
              value: "true"
            - name: LABELS
              value: "self-hosted,linux,fc-build-linux"
            - name: HOME
              value: "/home/runner"
            - name: DOTNET_INSTALL_DIR
              value: "/home/runner/.dotnet"
            - name: DOTNET_CLI_TELEMETRY_OPTOUT
              value: "1"
            - name: DOTNET_NOLOGO
              value: "1"
            - name: DOTNET_GENERATE_ASPNET_CERTIFICATE
              value: "false"
            - name: DOTNET_CLI_HOME
              value: "/home/runner"
            - name: NUGET_PACKAGES
              value: "/home/runner/.nuget/packages"
            - name: XDG_CACHE_HOME
              value: "/home/runner/.cache"
            - name: RUNNER_TOOL_CACHE
              value: "/home/runner/_tool"
            - name: ACCESS_TOKEN
              valueFrom:
                secretKeyRef:
                  name: github-runner-token
                  key: credential
            - name: RUN_AS_ROOT
              value: "false"
          resources:
            requests:
              cpu: "500m"
              memory: "1Gi"
            limits:
              cpu: "2000m"
              memory: "4Gi"
          volumeMounts:
            - name: runner-home
              mountPath: /home/runner
            - name: nuget-cache
              mountPath: /home/runner/.nuget/packages
            - name: tmp
              mountPath: /tmp
          livenessProbe:
            exec:
              command:
                - /bin/sh
                - -c
                - "pgrep -f Runner.Listener > /dev/null"
            initialDelaySeconds: 30
            periodSeconds: 30
            failureThreshold: 3
      volumes:
        - name: runner-home
          emptyDir: {}
        - name: nuget-cache
          emptyDir:
            sizeLimit: 2Gi
        - name: tmp
          emptyDir: {}
      restartPolicy: Always
 # Long-tail runner pattern:
 #
-# Sprint 32 added the final 16 long-tail repo-scoped Deployments above. Keep
+# Sprint 32 added the final 16 long-tail repo-scoped Deployments, and Sprint 37
-# Common as the only PVC-backed runner at replicas: 1. Any future multi-replica
+# added the DM + WorldBuilder runner gap closures above. Keep Common as the
-# runner must use per-pod emptyDir caches, not a shared ReadWriteOnce PVC.
+# only PVC-backed runner at replicas: 1. Any future multi-replica runner must
 # use per-pod emptyDir caches, not a shared ReadWriteOnce PVC.
--- a/apps/monitoring/noc-monitoring.yaml
+++ b/apps/monitoring/noc-monitoring.yaml
@@ -729,7 +729,7 @@ data:
            expr: |
              kube_deployment_status_replicas_ready{
                namespace="github-runner",
-                deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
+                deployment=~"github-runner(|-.+)"
              } == 0
            for: 5m
            labels:
@@ -1273,55 +1273,24 @@ metadata:
 data:
  notify.py: |
    #!/usr/bin/env python3
-    """HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
+    """HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
-
+    Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
-    Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
+    Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
    /api/print/alert. Thermal printing is BATCHED into hourly digests by
    default so the printer no longer spam-fires per Grafana webhook.
    Routing (per Grafana webhook alert):
      - IRC: always per-event (operator likes the stream)
      - Thermal printer:
          * severity in {critical,disaster,page} OR
            label alert_channel=thermal_print_immediate -> print NOW
          * label alert_channel=thermal_print -> enqueue into hourly digest
          * everything else -> IRC only
      - RESOLVED webhooks remove the alert from the digest buffer
    Env vars (defaults preserve old behavior on first deploy):
      THERMAL_PRINT_ENABLED  default "true"   - master kill switch
      BATCH_INTERVAL_MIN     default "60"     - minutes between digest prints
      BATCH_MAX_PENDING      default "50"     - force-flush threshold
    HTTP surface:
      POST /         - Grafana webhook entry
      POST /flush    - manual digest flush (idempotent)
      GET  /         - status + config + buffer depth + stats
    """
-    import json, os, socket, sys, threading, time
+    import json, socket, sys, time
    from collections import defaultdict
    from datetime import datetime, timezone
    from http.server import HTTPServer, BaseHTTPRequestHandler
    from urllib.request import Request, urlopen
    from urllib.error import URLError
-    THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
+    IRC_HOST = "unrealircd.irc.svc"  # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
-    BATCH_INTERVAL_MIN    = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
+    IRC_PORT = 6667
-    BATCH_MAX_PENDING     = int(os.environ.get("BATCH_MAX_PENDING", "50"))
+    IRC_NICK = "grafana-bot"
-
+    IRC_CHANNEL = "#alerts"
-    IRC_HOST      = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
+    PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
-    IRC_PORT      = int(os.environ.get("IRC_PORT", "6667"))
+    PRINT_ENABLED = True
    IRC_NICK      = os.environ.get("IRC_NICK", "grafana-bot")
    IRC_CHANNEL   = os.environ.get("IRC_CHANNEL", "#alerts")
    PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
    _buffer_lock = threading.Lock()
    _buffer = {}   # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
    _last_flush_time = time.time()
    _stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
              "digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
              "buffer_resolved": 0, "started_at": time.time()}
    def send_irc(message):
        """Connect, handle PING, join, send, quit."""
        try:
            sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
            sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
@@ -1354,137 +1323,52 @@ data:
            time.sleep(0.5)
            sock.sendall(b"QUIT :alert delivered\r\n")
            sock.close()
            _stats["irc_sent"] += 1
            return True
        except Exception as e:
            print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
            return False
-    def post_thermal(payload, kind):
+    def send_thermal_print(alert):
-        if not THERMAL_PRINT_ENABLED:
+        if not PRINT_ENABLED: return
-            print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
+        labels = alert.get("labels", {})
-            return False
+        annotations = alert.get("annotations", {})
        status = alert.get("status", "firing").upper()
        summary = annotations.get("summary", "")
        description = annotations.get("description", "")
        runbook = annotations.get("runbook", "")
        # Build a useful message: summary + description + runbook steps
        parts = []
        if summary: parts.append(summary)
        if description and description != summary: parts.append(description)
        if runbook: parts.append("STEPS: " + runbook)
        message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
        payload = {
            "title": labels.get("alertname", "Unknown"),
            "severity": labels.get("severity", "warning").capitalize(),
            "host": labels.get("instance", labels.get("host", "unknown")),
            "message": message,
            "eventId": alert.get("fingerprint", ""),
            "source": "Grafana",
            "status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
            "acknowledged": False
        }
        try:
            req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
                          headers={"Content-Type": "application/json"}, method="POST")
            resp = urlopen(req, timeout=10)
-            if kind == "immediate": _stats["print_immediate"] += 1
+            print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
            print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
            return True
        except Exception as e:
-            print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
+            print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
            return False
-    def fingerprint_of(alert):
+    def should_print(alert):
        fp = alert.get("fingerprint", "")
        if fp: return fp
        labels = alert.get("labels", {})
-        target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
+        if labels.get("alert_channel") == "thermal_print": return True
-        return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
+        if labels.get("severity", "").lower() in ("critical", "disaster"): return True
-
+        if alert.get("status", "").upper() == "RESOLVED": return False
-    def is_critical(alert):
+        return False
        return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
    def is_immediate_label(alert):
        return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
    def is_batched_label(alert):
        return alert.get("labels", {}).get("alert_channel") == "thermal_print"
    def add_to_digest(alert):
        """Add an alert to the digest buffer. Returns True if the buffer GREW
        (new fingerprint), False if it was a dedup, resolution, or no-op.
        """
        if not THERMAL_PRINT_ENABLED: return False
        fp = fingerprint_of(alert)
        status = alert.get("status", "firing").lower()
        with _buffer_lock:
            if status == "resolved":
                if fp in _buffer:
                    del _buffer[fp]
                    _stats["buffer_resolved"] += 1
                return False
            if fp in _buffer:
                _buffer[fp]["last_seen"] = time.time()
                _buffer[fp]["alert"] = alert
                _stats["buffer_dedup"] += 1
                return False
            _buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
            _stats["buffer_added"] += 1
            return True
    def build_digest_payload():
        with _buffer_lock:
            items = list(_buffer.values())
        if not items: return None
        by_name = defaultdict(list)
        for item in items:
            labels = item["alert"].get("labels", {})
            by_name[labels.get("alertname", "Unknown")].append(item)
        lines = []
        for name, group in sorted(by_name.items()):
            targets = []
            for it in group[:5]:
                labels = it["alert"].get("labels", {})
                t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
                     or labels.get("statefulset") or labels.get("namespace") or "?")
                targets.append(t)
            more = f" (+{len(group)-5})" if len(group) > 5 else ""
            sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
            lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
        now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
        title = f"Alert digest: {len(items)} firing"
        body = "\n".join([
            f"=== {title} ===",
            f"as of {now}",
            "",
            *lines,
            "",
            "Stream: #alerts (IRC)  |  Triage: grafana-noc1.iamworkin.lan",
            "Force-flush: POST irc-notify.monitoring.svc:9119/flush",
        ])
        return {"title": title, "severity": "Warning", "host": "monitoring",
                "message": body, "eventId": f"digest-{int(time.time())}",
                "source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
    def flush_digest():
        payload = build_digest_payload()
        if payload is None:
            print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
            return False
        sent = post_thermal(payload, "digest")
        with _buffer_lock:
            _buffer.clear()
        if sent: _stats["digest_flushed"] += 1
        return sent
    def digest_loop():
        global _last_flush_time
        while True:
            try:
                now = time.time()
                elapsed = now - _last_flush_time
                if elapsed >= BATCH_INTERVAL_MIN * 60:
                    print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
                    flush_digest()
                    _last_flush_time = now
                elif len(_buffer) >= BATCH_MAX_PENDING:
                    print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
                    flush_digest()
                    _last_flush_time = now
                time.sleep(15)
            except Exception as e:
                print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
                time.sleep(60)
    class Handler(BaseHTTPRequestHandler):
        def do_POST(self):
            if self.path == "/flush":
                ok = flush_digest()
                self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
                self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
                return
            _stats["webhooks_received"] += 1
            length = int(self.headers.get("Content-Length", 0))
            body = json.loads(self.rfile.read(length)) if length else {}
            for alert in body.get("alerts", []):
@@ -1499,56 +1383,22 @@ data:
                msg = f"{icon}{sev_tag} {name}: {summary}"
                if desc: msg += f"\n  {desc}"
                send_irc(msg)
-                # Thermal routing — EVERYTHING (including criticals) goes into
+                if should_print(alert): send_thermal_print(alert)
-                # the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
+            self.send_response(200)
-                # label bypasses, and even that flushes-the-current-digest rather
+            self.send_header("Content-Type", "application/json")
-                # than printing a standalone job, so the same fingerprint can't
+            self.end_headers()
                # spam the printer per webhook cycle.
                if status == "RESOLVED":
                    add_to_digest(alert)  # removes from buffer
                    continue
                if is_immediate_label(alert):
                    # Explicit opt-in for "paper this NOW" — first arrival of a
                    # new fingerprint triggers an immediate digest flush; repeat
                    # webhooks for the same fingerprint dedupe in the buffer
                    # until the next interval or until the alert resolves.
                    new_in_buffer = add_to_digest(alert)
                    if new_in_buffer:
                        global _last_flush_time
                        flush_digest()
                        _last_flush_time = time.time()
                elif is_critical(alert) or is_batched_label(alert):
                    add_to_digest(alert)
                # else: IRC-only (warnings without thermal_print label)
            self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
            self.wfile.write(b'{"status":"ok"}')
        def do_GET(self):
-            self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
+            self.send_response(200)
-            with _buffer_lock:
+            self.send_header("Content-Type", "application/json")
-                alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
+            self.end_headers()
-                depth = len(_buffer)
+            self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
            info = {
                "service": "irc-notify",
                "config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
                           "batch_interval_min": BATCH_INTERVAL_MIN,
                           "batch_max_pending": BATCH_MAX_PENDING,
                           "irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
                           "print_web_url": PRINT_WEB_URL},
                "buffer": {"depth": depth, "alertnames": alertnames,
                           "seconds_since_last_flush": int(time.time() - _last_flush_time),
                           "seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
                "stats": _stats,
            }
            self.wfile.write(json.dumps(info, indent=2).encode())
        def log_message(self, format, *args):
            print(f"[irc-notify] {args[0]}", file=sys.stderr)
    if __name__ == "__main__":
        threading.Thread(target=digest_loop, daemon=True).start()
        server = HTTPServer(("0.0.0.0", 9119), Handler)
-        print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
+        print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
        server.serve_forever()
 # =============================================================================
@@ -3659,7 +3509,7 @@ data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
-                model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
+                model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-.+)"} == 0', instant: true, refId: A}
              - refId: B
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: __expr__
--- a/tests/bluejay-infra-lint/FleetManifestLintTests.cs
+++ b/tests/bluejay-infra-lint/FleetManifestLintTests.cs
@@ -67,6 +67,8 @@ public sealed class FleetManifestLintTests
        ["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
        ["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
        ["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
        ["github-runner-devicemgmt"] = "https://github.com/astoltz/FlowerCore.DeviceManagement",
        ["github-runner-worldbuilder"] = "https://github.com/astoltz/FlowerCore.WorldBuilder",
    };
    private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
@@ -80,6 +82,8 @@ public sealed class FleetManifestLintTests
        "github-runner-chat",
        "github-runner-mysql",
        "github-runner-kiosk-linux",
        "github-runner-devicemgmt",
        "github-runner-worldbuilder",
    };
    private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
@@ -234,7 +238,7 @@ public sealed class FleetManifestLintTests
        {
            deployments.Should().ContainKey(expectedRunner.Key);
-            var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
+            var container = RunnerContainer(deployments[expectedRunner.Key]);
            EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
            EnvValue(container, "EPHEMERAL").Should().Be("true");
            EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
@@ -250,7 +254,7 @@ public sealed class FleetManifestLintTests
    {
        foreach (var deployment in GitHubRunnerDeployments().Values)
        {
-            var container = deployment.ContainerMappings().Should().ContainSingle().Subject;
+            var container = RunnerContainer(deployment);
            foreach (var expectedEnv in WritableRunnerEnv)
            {
@@ -311,7 +315,7 @@ public sealed class FleetManifestLintTests
        monitoring.Should().Contain("MacMiniRunnerOffline");
        monitoring.Should().Contain("LinuxRunnerOffline");
        monitoring.Should().Contain("kube_deployment_status_replicas_ready");
-        monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
+        monitoring.Should().Contain("github-runner(|-.+)");
        monitoring.Should().Contain("folder: CI Alerts");
        monitoring.Should().Contain("uid: linux-runner-offline");
        monitoring.Should().Contain("alert_channel: irc");
@@ -641,6 +645,15 @@ public sealed class FleetManifestLintTests
        return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
    }
    private static YamlMappingNode RunnerContainer(ManifestDocument deployment)
    {
        return deployment.ContainerMappings()
            .Where(container => string.Equals(ManifestNodeExtensions.Scalar(container, "name"), "runner", StringComparison.Ordinal))
            .Should()
            .ContainSingle($"{deployment.Name} must keep exactly one main runner container")
            .Subject;
    }
    private static string? EnvSecretName(YamlMappingNode container, string name)
    {
        return EnvMapping(container, name) is { } env