runners: right-size replica counts per 14d CI activity (#24 )

runners: right-size replica counts per 14d CI activity data
Drop 2 → 1 for 10 deploys based on trailing-14d run counts: - LlmBridge, Media, Knowledge, Intranet.Web, DNS (0 runs each) - Presentations (6), Redis (3), Provisioning (3), MessageBoard (3), MenuBoard (3) Bump 2 → 3 for Print.Web: 12 runs in trailing 5d, and the help-screenshots AAT job holds a runner 30+ min, creating head-of-line blocking for parallel PRs. Net change: -9 replicas (≈ -9 GiB committed memory). Aligns with Sprint 33 morning-routine capacity audit. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 00:01:47 +00:00 · 2026-05-25 18:55:47 -05:00 · 2026-05-25 16:03:03 -05:00 · 2026-05-25 15:58:35 -05:00 · 2026-05-25 15:50:10 -05:00 · 2026-05-20 14:52:02 -05:00
56 changed files with 7499 additions and 496 deletions
--- a/README.md
+++ b/README.md
@@ -118,6 +118,7 @@ That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `Flo
 ## References
 - OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
 - Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
 - Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
 - Public DNS operator host: `https://dns.iamworkin.lan`
--- a/apps/authentik/authentik.yaml
+++ b/apps/authentik/authentik.yaml
@@ -0,0 +1,448 @@
 # Authentik OIDC backend
 # ArgoCD-managed. BlueJay Lab.
 #
 # Stack:
 #   - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
 #   - Redis 7 Deployment (no persistence — session/cache only)
 #   - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
 #   - Media PVC shared between server + worker (Longhorn RWO 2Gi)
 #   - Certificate via step-ca-acme ClusterIssuer
 #   - Traefik IngressRoute at id.iamworkin.lan
 #
 # Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
 # via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
 #
 # Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
 # The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
 # via API once the bootstrap token is available — see Notes substrate).
 ---
 apiVersion: v1
 kind: Namespace
 metadata:
  name: authentik
  labels:
    app.kubernetes.io/part-of: bluejay-infra
 ---
 # 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
 # Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
 # BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
 apiVersion: onepassword.com/v1
 kind: OnePasswordItem
 metadata:
  name: authentik-credentials
  namespace: authentik
 spec:
  itemPath: "vaults/IAmWorkin/items/authentik-credentials"
 ---
 # Shared media volume for server + worker pods.
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: authentik-media
  namespace: authentik
 spec:
  storageClassName: longhorn
  accessModes: [ReadWriteOnce]
  resources:
    requests:
      storage: 2Gi
 ---
 # PostgreSQL 16 StatefulSet — Authentik's primary store.
 apiVersion: apps/v1
 kind: StatefulSet
 metadata:
  name: authentik-postgres
  namespace: authentik
  labels:
    app: authentik-postgres
    argocd.argoproj.io/instance: infra-authentik
 spec:
  persistentVolumeClaimRetentionPolicy:
    whenDeleted: Retain
    whenScaled: Retain
  podManagementPolicy: OrderedReady
  serviceName: authentik-postgres
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: authentik-postgres
  template:
    metadata:
      labels:
        app: authentik-postgres
    spec:
      containers:
        - name: postgres
          image: postgres:16-alpine
          ports:
            - containerPort: 5432
              name: postgres
          env:
            - name: POSTGRES_USER
              value: authentik
            - name: POSTGRES_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: POSTGRES_PASSWORD
            - name: POSTGRES_DB
              value: authentik
            - name: POSTGRES_INITDB_ARGS
              value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
            - name: PGDATA
              value: /var/lib/postgresql/data/pgdata
          readinessProbe:
            exec:
              command: ["pg_isready", "-U", "authentik"]
            initialDelaySeconds: 5
            periodSeconds: 5
          livenessProbe:
            exec:
              command: ["pg_isready", "-U", "authentik"]
            initialDelaySeconds: 30
            periodSeconds: 30
          resources:
            requests: { cpu: 100m, memory: 256Mi }
            limits: { cpu: 1000m, memory: 1Gi }
          volumeMounts:
            - name: pgdata
              mountPath: /var/lib/postgresql/data
  volumeClaimTemplates:
    - metadata:
        name: pgdata
      spec:
        storageClassName: longhorn
        accessModes: [ReadWriteOnce]
        volumeMode: Filesystem
        resources:
          requests:
            storage: 5Gi
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: authentik-postgres
  namespace: authentik
 spec:
  clusterIP: None
  selector:
    app: authentik-postgres
  ports:
    - name: postgres
      port: 5432
      targetPort: 5432
 ---
 # Redis 7 — session storage + Celery broker. No persistence needed (cache).
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: authentik-redis
  namespace: authentik
  labels:
    app: authentik-redis
    argocd.argoproj.io/instance: infra-authentik
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: authentik-redis
  template:
    metadata:
      labels:
        app: authentik-redis
    spec:
      containers:
        - name: redis
          image: redis:7-alpine
          args:
            - "--save"
            - ""
            - "--appendonly"
            - "no"
            - "--requirepass"
            - "$(REDIS_PASSWORD)"
          env:
            - name: REDIS_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: REDIS_PASSWORD
          ports:
            - containerPort: 6379
              name: redis
          readinessProbe:
            tcpSocket: { port: 6379 }
            initialDelaySeconds: 5
            periodSeconds: 5
          livenessProbe:
            tcpSocket: { port: 6379 }
            initialDelaySeconds: 30
            periodSeconds: 30
          resources:
            requests: { cpu: 50m, memory: 64Mi }
            limits: { cpu: 500m, memory: 256Mi }
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: authentik-redis
  namespace: authentik
 spec:
  selector:
    app: authentik-redis
  ports:
    - name: redis
      port: 6379
      targetPort: 6379
 ---
 # Authentik server Deployment — HTTP frontend on :9000.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: authentik-server
  namespace: authentik
  labels:
    app: authentik-server
    argocd.argoproj.io/instance: infra-authentik
 spec:
  replicas: 1
  strategy:
    type: Recreate  # shares /media RWO PVC with worker
  selector:
    matchLabels:
      app: authentik-server
  template:
    metadata:
      labels:
        app: authentik-server
    spec:
      securityContext:
        # Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
        # root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
        # non-root container can mkdir /media/public during the tenant_files migration.
        fsGroup: 1000
      containers:
        - name: server
          image: ghcr.io/goauthentik/server:2024.12.3
          args: ["server"]
          ports:
            - containerPort: 9000
              name: http
            - containerPort: 9443
              name: https
          env:
            - name: AUTHENTIK_SECRET_KEY
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: AUTHENTIK_SECRET_KEY
            - name: AUTHENTIK_REDIS__HOST
              value: authentik-redis
            - name: AUTHENTIK_REDIS__PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: REDIS_PASSWORD
            - name: AUTHENTIK_POSTGRESQL__HOST
              value: authentik-postgres
            - name: AUTHENTIK_POSTGRESQL__NAME
              value: authentik
            - name: AUTHENTIK_POSTGRESQL__USER
              value: authentik
            - name: AUTHENTIK_POSTGRESQL__PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: POSTGRES_PASSWORD
            - name: AUTHENTIK_BOOTSTRAP_PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: BOOTSTRAP_ADMIN_PASSWORD
            - name: AUTHENTIK_BOOTSTRAP_TOKEN
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: BOOTSTRAP_ADMIN_TOKEN
            - name: AUTHENTIK_BOOTSTRAP_EMAIL
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: BOOTSTRAP_ADMIN_EMAIL
            - name: AUTHENTIK_DISABLE_UPDATE_CHECK
              value: "true"
            - name: AUTHENTIK_ERROR_REPORTING__ENABLED
              value: "false"
            - name: AUTHENTIK_LOG_LEVEL
              value: info
          # First-boot Authentik can take 3+ min on the migration phase
          # (waiting on DB lock while worker also runs migrations). Initial
          # delays are generous so kubelet doesn't kill the pod mid-migration;
          # periodSeconds keeps post-startup probing responsive.
          readinessProbe:
            httpGet:
              path: /-/health/ready/
              port: 9000
            initialDelaySeconds: 60
            periodSeconds: 10
            timeoutSeconds: 5
            failureThreshold: 12
          livenessProbe:
            httpGet:
              path: /-/health/live/
              port: 9000
            initialDelaySeconds: 300
            periodSeconds: 30
            timeoutSeconds: 10
            failureThreshold: 3
          startupProbe:
            httpGet:
              path: /-/health/live/
              port: 9000
            initialDelaySeconds: 30
            periodSeconds: 15
            timeoutSeconds: 10
            failureThreshold: 40  # 30s + 40*15s = 10.5 min budget
          resources:
            requests: { cpu: 150m, memory: 512Mi }
            limits: { cpu: 1500m, memory: 1Gi }
          volumeMounts:
            - name: media
              mountPath: /media
      volumes:
        - name: media
          persistentVolumeClaim:
            claimName: authentik-media
 ---
 # Authentik worker Deployment — runs Celery background tasks.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: authentik-worker
  namespace: authentik
  labels:
    app: authentik-worker
    argocd.argoproj.io/instance: infra-authentik
 spec:
  replicas: 1
  strategy:
    type: Recreate  # shares /media RWO PVC with server
  selector:
    matchLabels:
      app: authentik-worker
  template:
    metadata:
      labels:
        app: authentik-worker
    spec:
      securityContext:
        # Same as server pod — non-root uid 1000 needs PVC group write.
        fsGroup: 1000
      containers:
        - name: worker
          image: ghcr.io/goauthentik/server:2024.12.3
          args: ["worker"]
          env:
            - name: AUTHENTIK_SECRET_KEY
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: AUTHENTIK_SECRET_KEY
            - name: AUTHENTIK_REDIS__HOST
              value: authentik-redis
            - name: AUTHENTIK_REDIS__PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: REDIS_PASSWORD
            - name: AUTHENTIK_POSTGRESQL__HOST
              value: authentik-postgres
            - name: AUTHENTIK_POSTGRESQL__NAME
              value: authentik
            - name: AUTHENTIK_POSTGRESQL__USER
              value: authentik
            - name: AUTHENTIK_POSTGRESQL__PASSWORD
              valueFrom:
                secretKeyRef:
                  name: authentik-credentials
                  key: POSTGRES_PASSWORD
            - name: AUTHENTIK_DISABLE_UPDATE_CHECK
              value: "true"
            - name: AUTHENTIK_ERROR_REPORTING__ENABLED
              value: "false"
            - name: AUTHENTIK_LOG_LEVEL
              value: info
          resources:
            requests: { cpu: 100m, memory: 256Mi }
            limits: { cpu: 1000m, memory: 768Mi }
          volumeMounts:
            - name: media
              mountPath: /media
      volumes:
        - name: media
          persistentVolumeClaim:
            claimName: authentik-media
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: authentik-server
  namespace: authentik
 spec:
  selector:
    app: authentik-server
  ports:
    - name: http
      port: 9000
      targetPort: 9000
    - name: https
      port: 9443
      targetPort: 9443
 ---
 # step-ca leaf certificate for id.iamworkin.lan.
 # step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
 # MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
 # otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
  name: authentik-tls
  namespace: authentik
 spec:
  secretName: authentik-tls
  dnsNames:
    - id.iamworkin.lan
  issuerRef:
    name: step-ca-acme
    kind: ClusterIssuer
 ---
 apiVersion: traefik.io/v1alpha1
 kind: IngressRoute
 metadata:
  name: authentik
  namespace: authentik
 spec:
  entryPoints: [websecure]
  routes:
    - match: Host(`id.iamworkin.lan`)
      kind: Rule
      services:
        - name: authentik-server
          port: 9000
  tls:
    secretName: authentik-tls
--- a/apps/fc-chat/fc-chat.yaml
+++ b/apps/fc-chat/fc-chat.yaml
@@ -30,3 +30,41 @@ spec:
          port: 80
  tls:
    secretName: chat-web-tls
 ---
 # Public host profile marker. The app treats this header as authoritative for
 # the public twin, while the internal chat.iamworkin.lan route does not attach
 # it and keeps the operator-oriented UI.
 apiVersion: traefik.io/v1alpha1
 kind: Middleware
 metadata:
  name: chat-public-profile-header
  namespace: fc-chat
 spec:
  headers:
    customRequestHeaders:
      X-FC-Chat-Host-Profile: "public"
 ---
 # Public Cloudflare-fronted twin for the anonymous chat surface. Operator
 # paths are intentionally absent from the allowlist below, so /admin,
 # /operator, /console, /ops, /api/operator, and /operatorhub miss this route
 # and return Traefik 404 before reaching the pod. Operator action still needed:
 # create/verify Cloudflare DNS chat.flowercore.io -> public Traefik endpoint
 # and mirror the cf-origin-flowercore-io TLS secret into namespace fc-chat.
 apiVersion: traefik.io/v1alpha1
 kind: IngressRoute
 metadata:
  name: chat-web-public
  namespace: fc-chat
 spec:
  entryPoints:
    - websecure
  routes:
    - match: Host(`chat.flowercore.io`) && (Path(`/`) || Path(`/chat`) || PathPrefix(`/_blazor`) || PathPrefix(`/_framework`) || PathPrefix(`/_content`) || PathPrefix(`/avatars`) || PathPrefix(`/css`) || PathPrefix(`/js`) || PathPrefix(`/favicon`) || PathPrefix(`/chathub`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
      kind: Rule
      middlewares:
        - name: chat-public-profile-header
      services:
        - name: chat-web
          port: 80
  tls:
    secretName: cf-origin-flowercore-io
--- a/apps/fc-devicemgmt/1password-item.yaml
+++ b/apps/fc-devicemgmt/1password-item.yaml
@@ -0,0 +1,26 @@
 # Runtime secrets for FlowerCore.DeviceManagement.
 #
 # OnePasswordItem operator syncs this item into a Kubernetes Secret with the
 # same name. Expected fields:
 #   DB-Password
 #   mtls-ca.pem
 #   mtls-client.crt
 #   mtls-client.key
 #   mtls-chain.pem
 #
 # Do not add literal secret values to this repo. Runtime pods consume the
 # synced Secret through env vars and read-only mounts.
 apiVersion: onepassword.com/v1
 kind: OnePasswordItem
 metadata:
  name: fc-devicemgmt-runtime
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt
    app.kubernetes.io/component: secrets
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"
--- a/apps/fc-devicemgmt/certificate-web.yaml
+++ b/apps/fc-devicemgmt/certificate-web.yaml
@@ -0,0 +1,30 @@
 # Certificate for devices.iamworkin.lan.
 #
 # Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
 #   devices.iamworkin.lan -> 10.0.56.200
 # before this Certificate is synced. step-ca ACME cannot see the CoreDNS
 # wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
 # (feedback_pfsense_dns_required_for_acme).
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
  name: fc-devicemgmt-web-tls
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
  annotations:
    flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
 spec:
  secretName: fc-devicemgmt-web-tls
  issuerRef:
    name: step-ca-acme
    kind: ClusterIssuer
  dnsNames:
    - devices.iamworkin.lan
  duration: 720h
  renewBefore: 240h
--- a/apps/fc-devicemgmt/clusterrole-operator.yaml
+++ b/apps/fc-devicemgmt/clusterrole-operator.yaml
@@ -0,0 +1,81 @@
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
  name: fc-devicemgmt-operator
  labels:
    app.kubernetes.io/name: fc-devicemgmt-operator
    app.kubernetes.io/component: operator
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 rules:
  - apiGroups:
      - devices.flowercore.io
    resources:
      - '*'
    verbs:
      - get
      - list
      - watch
      - create
      - update
      - patch
      - delete
  - apiGroups:
      - devices.flowercore.io
    resources:
      - devices/status
      - devices/finalizers
      - devicegroups/status
      - devicegroups/finalizers
      - devicepolicies/status
      - devicepolicies/finalizers
      - remotecommands/status
      - remotecommands/finalizers
    verbs:
      - get
      - update
      - patch
  - apiGroups:
      - apps
    resources:
      - deployments
    verbs:
      - get
  - apiGroups:
      - ""
    resources:
      - pods
      - services
      - configmaps
      - secrets
      - events
    verbs:
      - get
      - list
      - watch
      - create
      - update
      - patch
      - delete
  - apiGroups:
      - batch
    resources:
      - jobs
    verbs:
      - get
      - list
      - watch
      - create
      - update
      - patch
      - delete
  - apiGroups:
      - networking.k8s.io
    resources:
      - networkpolicies
    verbs:
      - get
      - list
      - watch
--- a/apps/fc-devicemgmt/clusterrolebinding-operator.yaml
+++ b/apps/fc-devicemgmt/clusterrolebinding-operator.yaml
@@ -0,0 +1,19 @@
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
  name: fc-devicemgmt-operator
  labels:
    app.kubernetes.io/name: fc-devicemgmt-operator
    app.kubernetes.io/component: operator
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: fc-devicemgmt-operator
 subjects:
  - kind: ServiceAccount
    name: fc-devicemgmt-operator
    namespace: fc-devicemgmt
--- a/apps/fc-devicemgmt/deployment-operator.yaml
+++ b/apps/fc-devicemgmt/deployment-operator.yaml
@@ -0,0 +1,109 @@
 # FlowerCore.DeviceManagement Operator.
 #
 # KubeOps controller for devices.flowercore.io resources. Operator-created
 # children must set OwnerReferences + traceability labels/annotations per
 # k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
 # apps/deployments/get so the process can resolve its own Deployment UID.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: fc-devicemgmt-operator
  namespace: fc-devicemgmt
  labels:
    app: fc-devicemgmt-operator
    app.kubernetes.io/name: fc-devicemgmt-operator
    app.kubernetes.io/component: operator
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
  annotations:
    flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
 spec:
  replicas: 1
  revisionHistoryLimit: 3
  selector:
    matchLabels:
      app: fc-devicemgmt-operator
  template:
    metadata:
      labels:
        app: fc-devicemgmt-operator
        app.kubernetes.io/name: fc-devicemgmt-operator
        app.kubernetes.io/component: operator
        app.kubernetes.io/part-of: flowercore
        app.kubernetes.io/managed-by: argocd
        flowercore.io/tenant-id: system
        flowercore.io/created-by: bluejay-infra
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
        flowercore.io/audit-trace-id: "runtime-activity-trace"
    spec:
      serviceAccountName: fc-devicemgmt-operator
      securityContext:
        fsGroup: 1654
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: operator
          image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
          imagePullPolicy: Never
          ports:
            - name: metrics
              containerPort: 8080
          env:
            - name: ASPNETCORE_ENVIRONMENT
              value: "Production"
            - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
              value: "false"
            - name: POD_NAME
              valueFrom:
                fieldRef:
                  fieldPath: metadata.name
            - name: POD_NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
              value: "fc-devicemgmt-operator"
            - name: FlowerCore__Service__Name
              value: "FlowerCore.DeviceManagement.Operator"
            - name: FlowerCore__DeviceManagement__DefaultTenantId
              value: "system"
          resources:
            requests:
              cpu: 50m
              memory: 128Mi
            limits:
              cpu: 500m
              memory: 512Mi
          readinessProbe:
            tcpSocket:
              port: 8080
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            tcpSocket:
              port: 8080
            initialDelaySeconds: 20
            periodSeconds: 30
          securityContext:
            runAsNonRoot: true
            runAsUser: 1654
            runAsGroup: 1654
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            capabilities:
              drop:
                - ALL
          volumeMounts:
            - name: tmp
              mountPath: /tmp
            - name: logs
              mountPath: /app/logs
      volumes:
        - name: tmp
          emptyDir: {}
        - name: logs
          emptyDir: {}
--- a/apps/fc-devicemgmt/deployment-web.yaml
+++ b/apps/fc-devicemgmt/deployment-web.yaml
@@ -0,0 +1,151 @@
 # FlowerCore.DeviceManagement Web.
 #
 # Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
 # Sprint 9+ lane. This manifest is static-valid without requiring the image to
 # exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
 # nodes before letting ArgoCD sync a live rollout.
 #
 # SCALED TO 0 — 2026-05-19 morning-routine cleanup.
 # The Web pod cannot start until TWO upstream gaps close:
 #   1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
 #      provisioned via fc-mysql Manager. The cluster currently has ZERO
 #      MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
 #      deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
 #      points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
 #   2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
 #      with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
 #      mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
 #      from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
 #      password configured for the MySQL user.
 # Re-enable: change replicas back to 2 after both gaps close. The image tag
 # in this file (v20260512-cx5) MAY also need a refresh — it predates the
 # Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: fc-devicemgmt-web
  namespace: fc-devicemgmt
  labels:
    app: fc-devicemgmt-web
    app.kubernetes.io/name: fc-devicemgmt-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
  annotations:
    flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
 spec:
  replicas: 0
  revisionHistoryLimit: 3
  selector:
    matchLabels:
      app: fc-devicemgmt-web
  template:
    metadata:
      labels:
        app: fc-devicemgmt-web
        app.kubernetes.io/name: fc-devicemgmt-web
        app.kubernetes.io/component: web
        app.kubernetes.io/part-of: flowercore
        app.kubernetes.io/managed-by: argocd
        flowercore.io/tenant-id: system
        flowercore.io/created-by: bluejay-infra
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
        flowercore.io/audit-trace-id: "runtime-activity-trace"
    spec:
      securityContext:
        fsGroup: 1654
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: web
          image: localhost/fc-devicemgmt-web:v20260512-cx5
          imagePullPolicy: Never
          ports:
            - name: http
              containerPort: 8080
          env:
            - name: ASPNETCORE_URLS
              value: "http://+:8080"
            - name: ASPNETCORE_ENVIRONMENT
              value: "Production"
            - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
              value: "false"
            - name: FlowerCore__Service__Name
              value: "FlowerCore.DeviceManagement.Web"
            - name: FlowerCore__DeviceManagement__DefaultTenantId
              value: "system"
            - name: FlowerCore__Database__Provider
              value: "MySql"
            - name: FlowerCore__Database__Host
              value: "mysql.fc-mysql.svc"
            - name: FlowerCore__Database__Database
              value: "flowercore_devicemgmt"
            - name: FlowerCore__Database__User
              value: "fc_devicemgmt"
            - name: FlowerCore__Database__Password
              valueFrom:
                secretKeyRef:
                  name: fc-devicemgmt-runtime
                  key: DB-Password
            - name: FlowerCore__DeviceManagement__AgentMtls__CaPath
              value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
            - name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
              value: "/secrets/devicemgmt-mtls/mtls-client.crt"
            - name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
              value: "/secrets/devicemgmt-mtls/mtls-client.key"
            - name: FlowerCore__EventBus__Redis__Configuration
              value: "redis.fc-redis.svc:6379"
          resources:
            requests:
              cpu: 100m
              memory: 256Mi
            limits:
              cpu: 1000m
              memory: 768Mi
          startupProbe:
            tcpSocket:
              port: 8080
            initialDelaySeconds: 5
            periodSeconds: 5
            failureThreshold: 30
          readinessProbe:
            tcpSocket:
              port: 8080
            periodSeconds: 10
            failureThreshold: 3
          livenessProbe:
            tcpSocket:
              port: 8080
            initialDelaySeconds: 30
            periodSeconds: 30
            failureThreshold: 3
          securityContext:
            runAsNonRoot: true
            runAsUser: 1654
            runAsGroup: 1654
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            capabilities:
              drop:
                - ALL
          volumeMounts:
            - name: tmp
              mountPath: /tmp
            - name: logs
              mountPath: /app/logs
            - name: devicemgmt-mtls
              mountPath: /secrets/devicemgmt-mtls
              readOnly: true
      volumes:
        - name: tmp
          emptyDir: {}
        - name: logs
          emptyDir: {}
        - name: devicemgmt-mtls
          secret:
            secretName: fc-devicemgmt-runtime
            defaultMode: 0400
--- a/apps/fc-devicemgmt/ingressroute-web.yaml
+++ b/apps/fc-devicemgmt/ingressroute-web.yaml
@@ -0,0 +1,55 @@
 # LAN ingress for FlowerCore.DeviceManagement Web.
 #
 # RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
 # ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
 apiVersion: traefik.io/v1alpha1
 kind: IngressRoute
 metadata:
  name: fc-devicemgmt-web
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  entryPoints:
    - websecure
  routes:
    - match: Host(`devices.iamworkin.lan`)
      kind: Rule
      services:
        - name: fc-devicemgmt-web
          port: 80
  tls:
    secretName: fc-devicemgmt-web-tls
 # Future public agent/update host gate (OFF by default):
 #
 # Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
 # resolves the public-device-management auth model and route ownership with
 # UpdateCenter. When enabled, use a separate public IngressRoute with an
 # explicit Method allowlist, public-host auth middleware, and public TLS
 # certificate strategy. Leaving this as comments keeps ArgoCD from stealing
 # live UpdateCenter traffic.
 #
 # apiVersion: traefik.io/v1alpha1
 # kind: IngressRoute
 # metadata:
 #   name: fc-devicemgmt-web-public
 #   namespace: fc-devicemgmt
 #   annotations:
 #     flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
 # spec:
 #   entryPoints:
 #     - websecure
 #   routes:
 #     - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
 #       kind: Rule
 #       services:
 #         - name: fc-devicemgmt-web
 #           port: 80
 #   tls:
 #     secretName: fc-devicemgmt-public-tls
--- a/apps/fc-devicemgmt/namespace.yaml
+++ b/apps/fc-devicemgmt/namespace.yaml
@@ -0,0 +1,13 @@
 # FlowerCore.DeviceManagement namespace.
 #
 # ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
 apiVersion: v1
 kind: Namespace
 metadata:
  name: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
--- a/apps/fc-devicemgmt/network-policy.yaml
+++ b/apps/fc-devicemgmt/network-policy.yaml
@@ -0,0 +1,224 @@
 # FlowerCore.DeviceManagement NetworkPolicies.
 #
 # NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
 # Rules include Traefik post-DNAT backend ports per
 # feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
 # cold-tier / future artifact path.
 ---
 apiVersion: networking.k8s.io/v1
 kind: NetworkPolicy
 metadata:
  name: fc-devicemgmt-web-isolation
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  podSelector:
    matchLabels:
      app: fc-devicemgmt-web
  policyTypes:
    - Ingress
    - Egress
  ingress:
    # LAN edge: only cluster Traefik should reach the Web pod for
    # devices.iamworkin.lan.
    - from:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: traefik-system
          podSelector:
            matchLabels:
              app.kubernetes.io/name: traefik
      ports:
        - port: 8080
          protocol: TCP
    # Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
    - from:
        - ipBlock:
            cidr: 10.0.56.0/24
        - ipBlock:
            cidr: 10.0.57.0/24
        - ipBlock:
            cidr: 10.0.58.0/24
        - ipBlock:
            cidr: 10.0.68.0/27
      ports:
        - port: 8080
          protocol: TCP
  egress:
    # CoreDNS.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: kube-system
          podSelector:
            matchLabels:
              k8s-app: kube-dns
      ports:
        - port: 53
          protocol: UDP
        - port: 53
          protocol: TCP
    # Database namespace.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: fc-mysql
      ports:
        - port: 3306
          protocol: TCP
    # Redis backplane for multi-replica SignalR / live-status fan-out.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: fc-redis
      ports:
        - port: 6379
          protocol: TCP
    # Traefik VIP / in-cluster Traefik for self-callbacks and public URL
    # generation tests. Include post-DNAT backend ports 8443 + 8080.
    - to:
        - ipBlock:
            cidr: 10.0.56.200/32
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: traefik-system
          podSelector:
            matchLabels:
              app.kubernetes.io/name: traefik
      ports:
        - port: 80
          protocol: TCP
        - port: 443
          protocol: TCP
        - port: 8080
          protocol: TCP
        - port: 8443
          protocol: TCP
    # Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
    # ThinClient, or Server mode. Keep this private-range only.
    - to:
        - ipBlock:
            cidr: 10.0.56.0/24
        - ipBlock:
            cidr: 10.0.57.0/24
        - ipBlock:
            cidr: 10.0.58.0/24
        - ipBlock:
            cidr: 10.0.68.0/27
      ports:
        - port: 80
          protocol: TCP
        - port: 443
          protocol: TCP
        - port: 8080
          protocol: TCP
        - port: 8443
          protocol: TCP
        - port: 5000
          protocol: TCP
        - port: 5001
          protocol: TCP
    # Synology NFS cold-tier / artifact mount allowance.
    - to:
        - ipBlock:
            cidr: 10.0.58.3/32
      ports:
        - port: 2049
          protocol: TCP
        - port: 2049
          protocol: UDP
        - port: 111
          protocol: TCP
        - port: 111
          protocol: UDP
 ---
 apiVersion: networking.k8s.io/v1
 kind: NetworkPolicy
 metadata:
  name: fc-devicemgmt-operator-isolation
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt-operator
    app.kubernetes.io/component: operator
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  podSelector:
    matchLabels:
      app: fc-devicemgmt-operator
  policyTypes:
    - Ingress
    - Egress
  ingress:
    - from:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: monitoring
      ports:
        - port: 8080
          protocol: TCP
  egress:
    # CoreDNS.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: kube-system
          podSelector:
            matchLabels:
              k8s-app: kube-dns
      ports:
        - port: 53
          protocol: UDP
        - port: 53
          protocol: TCP
    # Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
    - to: []
      ports:
        - port: 443
          protocol: TCP
        - port: 6443
          protocol: TCP
    # Agent egress for operator-initiated probes / fallback command dispatch.
    - to:
        - ipBlock:
            cidr: 10.0.56.0/24
        - ipBlock:
            cidr: 10.0.57.0/24
        - ipBlock:
            cidr: 10.0.58.0/24
        - ipBlock:
            cidr: 10.0.68.0/27
      ports:
        - port: 80
          protocol: TCP
        - port: 443
          protocol: TCP
        - port: 8080
          protocol: TCP
        - port: 8443
          protocol: TCP
        - port: 5000
          protocol: TCP
        - port: 5001
          protocol: TCP
    # Synology NFS allowance for future cold-tier/audit archival jobs.
    - to:
        - ipBlock:
            cidr: 10.0.58.3/32
      ports:
        - port: 2049
          protocol: TCP
        - port: 2049
          protocol: UDP
        - port: 111
          protocol: TCP
        - port: 111
          protocol: UDP
--- a/apps/fc-devicemgmt/service-web.yaml
+++ b/apps/fc-devicemgmt/service-web.yaml
@@ -0,0 +1,22 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: fc-devicemgmt-web
  namespace: fc-devicemgmt
  labels:
    app: fc-devicemgmt-web
    app.kubernetes.io/name: fc-devicemgmt-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  type: ClusterIP
  selector:
    app: fc-devicemgmt-web
  ports:
    - name: http
      port: 80
      targetPort: 8080
      protocol: TCP
--- a/apps/fc-devicemgmt/serviceaccount-operator.yaml
+++ b/apps/fc-devicemgmt/serviceaccount-operator.yaml
@@ -0,0 +1,12 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: fc-devicemgmt-operator
  namespace: fc-devicemgmt
  labels:
    app.kubernetes.io/name: fc-devicemgmt-operator
    app.kubernetes.io/component: operator
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
--- a/apps/fc-redis/fc-redis.yaml
+++ b/apps/fc-redis/fc-redis.yaml
@@ -0,0 +1,171 @@
 # fc-redis — SignalR backplane for cross-product event bus
 #
 # Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
 # not Phase C as originally drafted. Operator directive: "Redis can be
 # deployed just fine as it's another FlowerCore technology we'll want to
 # manage."
 #
 # Phase A scope (this file):
 #   - Single Redis 7.x Alpine pod
 #   - 1Gi Longhorn RWO PVC for AOF persistence
 #   - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
 #   - No AUTH (in-cluster only; not exposed externally)
 #   - No IngressRoute (backplane is server-to-server only)
 #
 # Consumers (Phase A IMPL across FC services):
 #   - FlowerCore.Signage.Web (OpsConsoleHub)
 #   - FlowerCore.Scoreboard.Web (ScoreboardHub)
 #   - FlowerCore.SignalControl.Web
 #   - FlowerCore.DMS.Web
 #   - Any other product joining the cross-product event bus
 #
 # Each consumer adds:
 #   services.AddSignalR()
 #           .AddStackExchangeRedis(
 #               "redis.fc-redis.svc.cluster.local:6379",
 #               opts => opts.Configuration.ChannelPrefix =
 #                   StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
 #
 # Phase B / C follow-ons (out of scope here):
 #   - Redis Sentinel for HA (3-node)
 #   - AUTH password from 1Password Connect (rotate via /rotate-password)
 #   - redis_exporter sidecar for Prometheus scrape
 #   - Network policies restricting which namespaces can dial 6379
 #
 # Design: docs/signage/operations-console-phase-2-design.md §3.5
 # Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
 # Memory: feedback_blooming_ui_pattern_no_iframes
 ---
 apiVersion: v1
 kind: Namespace
 metadata:
  name: fc-redis
  labels:
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
 ---
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: fc-redis-data
  namespace: fc-redis
 spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: longhorn
  resources:
    requests:
      storage: 1Gi
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: fc-redis-config
  namespace: fc-redis
 data:
  redis.conf: |
    # Phase A — minimal config; no AUTH, no replication.
    bind 0.0.0.0
    protected-mode no
    port 6379
    tcp-backlog 511
    timeout 0
    tcp-keepalive 300
    # Persistence: AOF (fsync every second is the standard SignalR-backplane
    # durability sweet spot — the backplane only needs to survive Redis
    # restarts, not absolute zero loss).
    appendonly yes
    appendfsync everysec
    auto-aof-rewrite-percentage 100
    auto-aof-rewrite-min-size 64mb
    # Reasonable defaults — let Redis pick most things.
    maxmemory-policy allkeys-lru
    maxmemory 256mb
    # Logging
    loglevel notice
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: fc-redis
  namespace: fc-redis
  labels:
    app: fc-redis
 spec:
  replicas: 1
  strategy:
    type: Recreate           # RWO PVC; do not do rolling update
  selector:
    matchLabels:
      app: fc-redis
  template:
    metadata:
      labels:
        app: fc-redis
    spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: 999       # redis:7-alpine default uid
        runAsGroup: 999
        fsGroup: 999
      containers:
        - name: redis
          image: redis:7-alpine
          imagePullPolicy: IfNotPresent
          command: ["redis-server", "/etc/redis/redis.conf"]
          ports:
            - name: redis
              containerPort: 6379
          resources:
            requests:
              cpu: "50m"
              memory: "128Mi"
            limits:
              cpu: "500m"
              memory: "384Mi"
          volumeMounts:
            - name: data
              mountPath: /data
            - name: config
              mountPath: /etc/redis
              readOnly: true
          livenessProbe:
            tcpSocket:
              port: 6379
            initialDelaySeconds: 5
            periodSeconds: 10
          readinessProbe:
            exec:
              command: ["redis-cli", "ping"]
            initialDelaySeconds: 2
            periodSeconds: 5
          securityContext:
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            capabilities:
              drop: [ALL]
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: fc-redis-data
        - name: config
          configMap:
            name: fc-redis-config
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: redis
  namespace: fc-redis
 spec:
  type: ClusterIP
  selector:
    app: fc-redis
  ports:
    - name: redis
      port: 6379
      targetPort: 6379
      protocol: TCP
--- a/apps/fc-signage-appletv/README.md
+++ b/apps/fc-signage-appletv/README.md
@@ -0,0 +1,14 @@
 # fc-signage-appletv
 Apple TV signage is a sealed appliance running the `FlowerCore.Signage.Agent.AppleTv` tvOS app per ADR-134.
 This ApplicationSet entry is documentation and inventory metadata only. It intentionally creates no `Deployment`, `Service`, or `Pod`.
 The Apple TV app connects outbound to existing FC.Signage.Web surfaces:
 - `https://signage.iamworkin.lan/hub/signage` for SignalR live status.
 - `GET /api/v1/nodes/{nodeId}/state` for the 30 second polling fallback.
 - `POST /api/v1/nodes/register` and `POST /api/v1/nodes/{nodeId}/enroll` for pairing and mTLS enrollment.
 - `POST /api/v1/nodes/{nodeId}/heartbeat` for metrics, current content identity, and local audit excerpts.
 Distribution is via Apple Developer Enterprise Program or TestFlight plus FC.Distribution / UpdateCenter publishing once Apple credentials are available.
--- a/apps/fc-signage-appletv/kustomization.yaml
+++ b/apps/fc-signage-appletv/kustomization.yaml
@@ -0,0 +1,5 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - manifest.yaml
--- a/apps/fc-signage-appletv/manifest.yaml
+++ b/apps/fc-signage-appletv/manifest.yaml
@@ -0,0 +1,26 @@
 # Apple TV signage is a sealed tvOS appliance. This ArgoCD app intentionally
 # carries documentation metadata only; no Deployment, Service, or Pod resources
 # are created for the player.
 ---
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: fc-signage-appletv-docs
  namespace: fc-signage
  labels:
    app.kubernetes.io/name: fc-signage-appletv
    app.kubernetes.io/part-of: flowercore-signage
    flowercore.io/manifest-kind: docs-only
 data:
  README: |
    FlowerCore.Signage.Agent.AppleTv is distributed through Apple Developer
    Enterprise Program or TestFlight, not Kubernetes.
    The app connects outbound to FC.Signage.Web:
    - SignalR: https://signage.iamworkin.lan/hub/signage
    - Polling fallback: GET /api/v1/nodes/{nodeId}/state
    - Enrollment: POST /api/v1/nodes/{nodeId}/enroll
    - Heartbeat: POST /api/v1/nodes/{nodeId}/heartbeat
    This placeholder gives ArgoCD and inventory dashboards a first-class
    Apple TV signage app entry without creating runtime pods.
--- a/apps/fc-signage-pi-player/README.md
+++ b/apps/fc-signage-pi-player/README.md
@@ -0,0 +1,17 @@
 # FlowerCore Signage Pi Player
 Phase 1 Raspberry Pi signage player packaging for Chromium kiosk deployments.
 This bundle is intentionally air-gap friendly: systemd units, shell scripts,
 udev rules, and Chromium managed policy are all checked into the repo and are
 installed by `FlowerCore.Puppet`.
 ## Scope
 - Bootstrap a stable node identity and mTLS client certificate.
 - Launch Chromium in kiosk mode against `FC.Signage.Web` player routes.
 - Restart the kiosk on HDMI hotplug.
 - Renew mTLS certificates daily when fewer than 30 days remain.
 - Detect display capabilities at boot, daily, and on HDMI hotplug.
 Phase 2 native Avalonia rendering is documented separately in Notes and remains
 deferred.
--- a/apps/fc-signage-pi-player/chromium-policies/flowercore-signage.json
+++ b/apps/fc-signage-pi-player/chromium-policies/flowercore-signage.json
@@ -0,0 +1,15 @@
 {
  "AutofillAddressEnabled": false,
  "AutofillCreditCardEnabled": false,
  "PasswordManagerEnabled": false,
  "BrowserSignin": 0,
  "MetricsReportingEnabled": false,
  "SafeBrowsingProtectionLevel": 0,
  "DefaultNotificationsSetting": 2,
  "DefaultPopupsSetting": 2,
  "BackgroundModeEnabled": false,
  "DefaultBrowserSettingEnabled": false,
  "PromotionalTabsEnabled": false,
  "CommandLineFlagSecurityWarningsEnabled": false,
  "ExtensionInstallBlocklist": ["*"]
 }
--- a/apps/fc-signage-pi-player/scripts/fc-signage-detect-display
+++ b/apps/fc-signage-pi-player/scripts/fc-signage-detect-display
@@ -0,0 +1,132 @@
 #!/usr/bin/env bash
 set -euo pipefail
 NODE_JSON="/etc/flowercore/signage-node.json"
 CERT_DIR="/etc/fc-signage-player"
 SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
 NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
 CONNECTORS=()
 for dir in /sys/class/drm/card*-HDMI-A-*; do
  [[ -e "$dir/status" ]] || continue
  if [[ "$(cat "$dir/status")" == "connected" ]]; then
    CONNECTORS+=("$(basename "$dir")")
  fi
 done
 if [[ ${#CONNECTORS[@]} -eq 0 ]]; then
  CAPABILITIES_JSON=$(jq -n --arg id "$NODE_ID" '{
    nodeId: $id,
    platform: "linux-arm64-pi",
    displayConnected: false,
    detectedAt: (now | todate),
    note: "No HDMI display detected"
  }')
 else
  PRIMARY="${CONNECTORS[0]}"
  EDID_PATH="/sys/class/drm/${PRIMARY}/edid"
  WIDTH=0
  HEIGHT=0
  REFRESH=60
  HDR=false
  AUDIO_HDMI=false
  MFG=""
  MODEL=""
  PHYSICAL_SIZE=null
  if [[ -s "$EDID_PATH" ]] && command -v edid-decode >/dev/null 2>&1; then
    EDID_INFO=$(edid-decode < "$EDID_PATH" 2>/dev/null || true)
    MFG=$(echo "$EDID_INFO" | grep -m1 -oP 'Manufacturer:\s*\K\S+' || true)
    MODEL=$(echo "$EDID_INFO" | grep -m1 -oP 'Model:\s*\K\S+' || true)
    PREF=$(echo "$EDID_INFO" | grep -m1 -oP '\d+x\d+\s*@\s*\d+(?:\.\d+)?\s*Hz' || true)
    if [[ -n "$PREF" ]]; then
      WIDTH=$(echo "$PREF" | grep -oP '^\d+')
      HEIGHT=$(echo "$PREF" | grep -oP 'x\K\d+')
      REFRESH=$(echo "$PREF" | grep -oP '@\s*\K[\d.]+' | cut -d. -f1)
    fi
    if echo "$EDID_INFO" | grep -qiE 'HDR (Static|Dynamic) Metadata Block'; then HDR=true; fi
    if echo "$EDID_INFO" | grep -qiE 'CEA Audio Block|Audio Format Descriptor'; then AUDIO_HDMI=true; fi
    PH_W=$(echo "$EDID_INFO" | grep -m1 -oP 'Maximum image size:\s*\K\d+\s*cm\s*x\s*\d+' || true)
    if [[ -n "$PH_W" ]]; then
      PH_CM_W=$(echo "$PH_W" | grep -oP '^\d+')
      PH_CM_H=$(echo "$PH_W" | grep -oP 'x\s*\K\d+')
      if (( PH_CM_W > 0 && PH_CM_H > 0 )); then
        PHYSICAL_SIZE=$(awk -v w="$PH_CM_W" -v h="$PH_CM_H" 'BEGIN { printf "%.1f", sqrt(w*w + h*h)/2.54 }')
      fi
    fi
  fi
  if [[ "$WIDTH" == "0" ]] && command -v kmsprint >/dev/null 2>&1; then
    KMS=$(kmsprint 2>/dev/null | grep -A2 "$PRIMARY" | grep -oP '\d+x\d+' | head -1 || true)
    if [[ -n "$KMS" ]]; then
      WIDTH=$(echo "$KMS" | grep -oP '^\d+')
      HEIGHT=$(echo "$KMS" | grep -oP 'x\K\d+')
    fi
  fi
  AUDIO_ALSA=false
  if aplay -l 2>/dev/null | grep -qi 'card.*HDMI'; then AUDIO_ALSA=true; fi
  HAS_AUDIO=false
  if [[ "$AUDIO_HDMI" == "true" && "$AUDIO_ALSA" == "true" ]]; then HAS_AUDIO=true; fi
  CAPABILITIES_JSON=$(jq -n \
    --arg id "$NODE_ID" \
    --argjson w "$WIDTH" \
    --argjson h "$HEIGHT" \
    --argjson r "$REFRESH" \
    --argjson hdr "$HDR" \
    --argjson audio "$HAS_AUDIO" \
    --arg connector "$PRIMARY" \
    --arg mfg "$MFG" \
    --arg model "$MODEL" \
    --argjson size "$PHYSICAL_SIZE" \
    '{
      nodeId: $id,
      platform: "linux-arm64-pi",
      displayConnected: true,
      detectedAt: (now | todate),
      hardware: {
        maxResolution: { width: $w, height: $h },
        nativeResolution: { width: $w, height: $h },
        refreshRateHz: $r,
        colorDepth: ($hdr | if . then "Color30Hdr" else "Color24" end),
        hasAudioOutput: $audio,
        audioChannelCount: ($audio | if . then 2 else 0 end),
        physicalSizeInches: $size,
        connector: $connector,
        manufacturer: $mfg,
        modelName: $model
      },
      render: { codecs: ["h264", "vp9", "mp4"] }
    }')
 fi
 ENDPOINT_CANDIDATES=(
  "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/capabilities"
  "${SIGNAGE_URL}/api/v1/displays/${NODE_ID}/capability-profile"
 )
 SUCCESS=false
 for url in "${ENDPOINT_CANDIDATES[@]}"; do
  HTTP_STATUS=$(curl -sk -o /tmp/cap-response.json -w "%{http_code}" \
    --max-time 10 \
    --cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
    -X POST "$url" \
    -H "Content-Type: application/json" \
    -d "$CAPABILITIES_JSON" || echo "000")
  if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" || "$HTTP_STATUS" == "204" ]]; then
    SUCCESS=true
    break
  fi
 done
 mkdir -p /var/log/fc-signage-player
 if [[ "$SUCCESS" != "true" ]]; then
  echo "[$(date -Is)] capability declare: no endpoint accepted the profile; logging locally" \
    | tee -a /var/log/fc-signage-player/capabilities.log
  echo "$CAPABILITIES_JSON" | tee -a /var/log/fc-signage-player/capabilities.log
 else
  echo "[$(date -Is)] capability declare: ok ($url)" | tee -a /var/log/fc-signage-player/capabilities.log
 fi
 echo "$CAPABILITIES_JSON"
--- a/apps/fc-signage-pi-player/scripts/flowercore-signage-bootstrap.sh
+++ b/apps/fc-signage-pi-player/scripts/flowercore-signage-bootstrap.sh
@@ -0,0 +1,144 @@
 #!/usr/bin/env bash
 set -euo pipefail
 NODE_JSON="/etc/flowercore/signage-node.json"
 CERT_DIR="/etc/fc-signage-player"
 SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
 SETUP_CODE_FILE="/etc/flowercore/signage-setup-code"
 mkdir -p /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
 chown fc-signage:fc-signage /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
 chmod 0750 "$CERT_DIR"
 if [[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]; then
  ENROLLED=$(jq -r '.enrolledAt // empty' "$NODE_JSON")
  if [[ -n "$ENROLLED" ]]; then
    echo "[$(date -Is)] bootstrap: already enrolled at $ENROLLED; skipping"
    exit 0
  fi
 fi
 if [[ -s "$NODE_JSON" ]]; then
  NODE_UUID=$(jq -r '.nodeUuid // empty' "$NODE_JSON")
  MACHINE_ID=$(jq -r '.machineId // empty' "$NODE_JSON")
 else
  NODE_UUID=$(uuidgen)
  MACHINE_ID=$(echo "$NODE_UUID" | tr -d '-' | cut -c1-16)
  jq -n --arg uuid "$NODE_UUID" --arg machine "$MACHINE_ID" --arg host "$(hostname -f)" --arg ts "$(date -Is)" \
    '{nodeUuid: $uuid, machineId: $machine, hostname: $host, platform: "linux-arm64-pi", createdAt: $ts}' \
    > "$NODE_JSON"
  chmod 0640 "$NODE_JSON"
  chown fc-signage:fc-signage "$NODE_JSON"
 fi
 SETUP_CODE=""
 if [[ -s "$SETUP_CODE_FILE" ]]; then
  SETUP_CODE=$(tr -d '\r\n\t ' < "$SETUP_CODE_FILE")
 fi
 MODEL=$(tr -d '\0' < /sys/firmware/devicetree/base/model 2>/dev/null || echo Unknown)
 REG_PAYLOAD=$(jq -n \
  --arg machine "$MACHINE_ID" \
  --arg name "$(hostname -f)" \
  --arg setup "$SETUP_CODE" \
  --arg resolution "1920x1080" \
  --arg model "$MODEL" \
  '{
    machineId: $machine,
    name: $name,
    setupCode: ($setup | if . == "" then null else . end),
    resolution: $resolution,
    hardwareModel: $model,
    platform: "linux-arm64-pi"
  }')
 for attempt in 1 2; do
  HTTP_STATUS=$(curl -sk -o /tmp/register-response.json -w "%{http_code}" \
    --max-time 15 \
    -X POST "${SIGNAGE_URL}/api/v1/nodes/register" \
    -H "Content-Type: application/json" \
    -d "$REG_PAYLOAD" || echo "000")
  if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" ]]; then
    break
  fi
  echo "[$(date -Is)] bootstrap: register attempt $attempt returned $HTTP_STATUS" >&2
  sleep 5
 done
 if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
  echo "[$(date -Is)] bootstrap: register failed after 2 attempts" >&2
  exit 2
 fi
 NODE_ID=$(jq -r '.nodeId // empty' /tmp/register-response.json)
 if [[ -z "$NODE_ID" ]]; then
  echo "[$(date -Is)] bootstrap: register response did not include nodeId" >&2
  exit 2
 fi
 jq --arg id "$NODE_ID" '.nodeId = $id' "$NODE_JSON" > "${NODE_JSON}.tmp" && mv "${NODE_JSON}.tmp" "$NODE_JSON"
 if [[ -s "$SETUP_CODE_FILE" ]]; then
  curl -sk -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/approve-via-setup-code" \
    -H "Content-Type: application/json" \
    -d "{\"setupCode\":\"${SETUP_CODE}\"}" \
    -o /dev/null || true
 fi
 STATUS=""
 DEADLINE=$(( $(date +%s) + 1800 ))
 while (( $(date +%s) < DEADLINE )); do
  STATUS=$(curl -sk --max-time 5 "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/status" | jq -r '.status // empty')
  if [[ "$STATUS" == "Approved" || "$STATUS" == "Enrolled" || "$STATUS" == "Online" ]]; then
    break
  fi
  sleep 15
 done
 if [[ "$STATUS" != "Approved" && "$STATUS" != "Enrolled" && "$STATUS" != "Online" ]]; then
  echo "[$(date -Is)] bootstrap: approval not granted within 30min budget" >&2
  exit 3
 fi
 KEY_PATH="${CERT_DIR}/client.key"
 CSR_PATH="${CERT_DIR}/client.csr"
 openssl ecparam -genkey -name prime256v1 -out "$KEY_PATH"
 openssl req -new -key "$KEY_PATH" -out "$CSR_PATH" \
  -subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
 ENROLL_PAYLOAD=$(jq -n --arg csr "$(cat "$CSR_PATH")" '{certificateSigningRequest: $csr}')
 HTTP_STATUS=$(curl -sk -o /tmp/enroll-response.json -w "%{http_code}" \
  --max-time 15 \
  -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/enroll" \
  -H "Content-Type: application/json" \
  -d "$ENROLL_PAYLOAD")
 if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
  echo "[$(date -Is)] bootstrap: enroll failed with HTTP $HTTP_STATUS" >&2
  exit 4
 fi
 jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/client.crt"
 jq -r '.caCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/ca-chain.pem"
 P12_PASS=$(openssl rand -hex 24)
 echo -n "$P12_PASS" > "${CERT_DIR}/client.p12.pass"
 chmod 0600 "${CERT_DIR}/client.p12.pass"
 openssl pkcs12 -export \
  -inkey "$KEY_PATH" \
  -in "${CERT_DIR}/client.crt" \
  -certfile "${CERT_DIR}/ca-chain.pem" \
  -out "${CERT_DIR}/client.p12" \
  -password "pass:${P12_PASS}"
 chown fc-signage:fc-signage "${CERT_DIR}"/* "$NODE_JSON"
 chmod 0640 "${CERT_DIR}/client.p12" "${CERT_DIR}/client.crt" "${CERT_DIR}/ca-chain.pem" "$KEY_PATH"
 chmod 0600 "${CERT_DIR}/client.p12.pass"
 EXPIRY=$(openssl x509 -in "${CERT_DIR}/client.crt" -enddate -noout | sed 's/notAfter=//')
 jq --arg ts "$(date -Is)" --arg exp "$EXPIRY" \
  '.enrolledAt = $ts | .certExpiry = $exp' "$NODE_JSON" > "${NODE_JSON}.tmp" \
  && mv "${NODE_JSON}.tmp" "$NODE_JSON"
 systemctl start flowercore-signage-detect-display.service || true
 systemctl start flowercore-signage-player-pi.service || true
 echo "[$(date -Is)] bootstrap: enrolled and kiosk started (NodeId=${NODE_ID})"
--- a/apps/fc-signage-pi-player/scripts/flowercore-signage-hdmi-respond.sh
+++ b/apps/fc-signage-pi-player/scripts/flowercore-signage-hdmi-respond.sh
@@ -0,0 +1,6 @@
 #!/usr/bin/env bash
 set -euo pipefail
 sleep 2
 systemctl start flowercore-signage-detect-display.service || true
 systemctl restart flowercore-signage-player-pi.service
--- a/apps/fc-signage-pi-player/scripts/flowercore-signage-launch.sh
+++ b/apps/fc-signage-pi-player/scripts/flowercore-signage-launch.sh
@@ -0,0 +1,44 @@
 #!/usr/bin/env bash
 set -euo pipefail
 NODE_JSON="/etc/flowercore/signage-node.json"
 NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
 SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
 CERT_DIR="/etc/fc-signage-player"
 CERT_THUMB=$(openssl pkcs12 -in "$CERT_DIR/client.p12" -passin file:"$CERT_DIR/client.p12.pass" -nodes -nokeys 2>/dev/null \
  | openssl x509 -fingerprint -sha256 -noout \
  | sed 's/.*=//' \
  | tr -d ':')
 PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}/embed?token=${CERT_THUMB}"
 HTTP_STATUS=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 5 \
  --cert-type P12 --cert "$CERT_DIR/client.p12:$(cat "$CERT_DIR/client.p12.pass")" \
  "$PLAYER_URL" || echo "000")
 mkdir -p /var/log/fc-signage-player
 if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "301" && "$HTTP_STATUS" != "302" ]]; then
  echo "[$(date -Is)] /embed returned $HTTP_STATUS; falling back to /player/${NODE_ID}" \
    >> /var/log/fc-signage-player/url-divergence.log
  PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}?token=${CERT_THUMB}"
 fi
 exec chromium-browser \
  --kiosk \
  --noerrdialogs \
  --disable-infobars \
  --disable-translate \
  --disable-features=TranslateUI,InfiniteSessionRestore \
  --autoplay-policy=no-user-gesture-required \
  --password-store=basic \
  --user-data-dir=/var/lib/fc-signage-player/profile \
  --disk-cache-dir=/var/lib/fc-signage-player/cache \
  --disk-cache-size=104857600 \
  --no-first-run \
  --no-default-browser-check \
  --check-for-update-interval=2592000 \
  --enable-features=OverlayScrollbar \
  --start-fullscreen \
  --window-position=0,0 \
  --window-size=1920,1080 \
  "$PLAYER_URL"
--- a/apps/fc-signage-pi-player/scripts/flowercore-signage-prelaunch.sh
+++ b/apps/fc-signage-pi-player/scripts/flowercore-signage-prelaunch.sh
@@ -0,0 +1,20 @@
 #!/usr/bin/env bash
 set -euo pipefail
 mkdir -p /var/log/fc-signage-player
 for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass; do
  if [[ ! -r "$f" ]]; then
    echo "[$(date -Is)] prelaunch: missing or unreadable $f" >&2
    exit 1
  fi
 done
 if openssl pkcs12 -in /etc/fc-signage-player/client.p12 -passin file:/etc/fc-signage-player/client.p12.pass -nokeys -clcerts 2>/dev/null \
   | openssl x509 -checkend $((7*24*3600)) -noout; then
  :
 else
  echo "[$(date -Is)] prelaunch: client cert expires within 7 days" >&2
 fi
 echo "[$(date -Is)] prelaunch: ok" | tee -a /var/log/fc-signage-player/prelaunch.log
--- a/apps/fc-signage-pi-player/scripts/flowercore-signage-renew-cert.sh
+++ b/apps/fc-signage-pi-player/scripts/flowercore-signage-renew-cert.sh
@@ -0,0 +1,46 @@
 #!/usr/bin/env bash
 set -euo pipefail
 CERT_DIR="/etc/fc-signage-player"
 NODE_JSON="/etc/flowercore/signage-node.json"
 SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
 [[ -s "$CERT_DIR/client.crt" ]] || { echo "no cert to renew"; exit 0; }
 if openssl x509 -in "$CERT_DIR/client.crt" -checkend $((30*24*3600)) -noout; then
  exit 0
 fi
 NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
 NEW_KEY="$CERT_DIR/client.key.new"
 NEW_CSR="$CERT_DIR/client.csr.new"
 openssl ecparam -genkey -name prime256v1 -out "$NEW_KEY"
 openssl req -new -key "$NEW_KEY" -out "$NEW_CSR" \
  -subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
 HTTP_STATUS=$(curl -sk -o /tmp/renew-response.json -w "%{http_code}" \
  --cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
  -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/renew" \
  -H "Content-Type: application/json" \
  -d "$(jq -n --arg csr "$(cat "$NEW_CSR")" '{certificateSigningRequest: $csr}')")
 if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
  echo "[$(date -Is)] renew: failed HTTP $HTTP_STATUS; leaving old cert in place" >&2
  exit 5
 fi
 jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/renew-response.json > "$CERT_DIR/client.crt.new"
 jq -r '.caCertificatePem' /tmp/renew-response.json > "$CERT_DIR/ca-chain.pem.new"
 P12_PASS=$(cat "$CERT_DIR/client.p12.pass")
 openssl pkcs12 -export -inkey "$NEW_KEY" -in "$CERT_DIR/client.crt.new" \
  -certfile "$CERT_DIR/ca-chain.pem.new" \
  -out "$CERT_DIR/client.p12.new" -password "pass:${P12_PASS}"
 mv "$CERT_DIR/client.key.new" "$CERT_DIR/client.key"
 mv "$CERT_DIR/client.crt.new" "$CERT_DIR/client.crt"
 mv "$CERT_DIR/ca-chain.pem.new" "$CERT_DIR/ca-chain.pem"
 mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"
 chown fc-signage:fc-signage "$CERT_DIR"/client.*
 systemctl restart flowercore-signage-player-pi.service
--- a/apps/fc-signage-pi-player/systemd/99-flowercore-signage-hdmi.rules
+++ b/apps/fc-signage-pi-player/systemd/99-flowercore-signage-hdmi.rules
@@ -0,0 +1,2 @@
 # Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
 SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-bootstrap.service
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-bootstrap.service
@@ -0,0 +1,16 @@
 [Unit]
 Description=FlowerCore Signage Pi: first-boot identity + mTLS enrollment
 Wants=network-online.target
 After=network-online.target
 Before=flowercore-signage-player-pi.service
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/flowercore-signage-bootstrap.sh
 RemainAfterExit=yes
 StandardOutput=journal
 StandardError=journal
 TimeoutStartSec=2100
 [Install]
 WantedBy=multi-user.target
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-detect-display.service
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-detect-display.service
@@ -0,0 +1,8 @@
 [Unit]
 Description=FlowerCore Signage Pi: detect connected display + declare capabilities
 After=flowercore-signage-bootstrap.service
 [Service]
 Type=oneshot
 User=fc-signage
 ExecStart=/usr/local/bin/fc-signage-detect-display
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-detect-display.timer
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-detect-display.timer
@@ -0,0 +1,11 @@
 [Unit]
 Description=Daily FlowerCore Signage Pi display capability redeclaration
 [Timer]
 OnCalendar=daily
 RandomizedDelaySec=1h
 Persistent=true
 OnBootSec=30s
 [Install]
 WantedBy=timers.target
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-player-pi-hdmi.service
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-player-pi-hdmi.service
@@ -0,0 +1,7 @@
 [Unit]
 Description=FlowerCore Signage Pi Player HDMI hotplug responder
 DefaultDependencies=no
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/flowercore-signage-hdmi-respond.sh
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-player-pi.service
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-player-pi.service
@@ -0,0 +1,30 @@
 [Unit]
 Description=FlowerCore Digital Signage Pi Player (Chromium kiosk)
 Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/appletv-pi-signage-agents-design.md
 Wants=network-online.target
 After=network-online.target graphical.target
 ConditionPathExists=/etc/flowercore/signage-node.json
 ConditionPathExists=/etc/fc-signage-player/client.p12
 [Service]
 Type=simple
 User=fc-signage
 Group=fc-signage
 WorkingDirectory=/var/lib/fc-signage-player
 EnvironmentFile=-/etc/flowercore/signage-player.env
 ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh
 ExecStart=/usr/local/bin/flowercore-signage-launch.sh
 Restart=always
 RestartSec=10s
 StartLimitBurst=5
 StartLimitIntervalSec=300s
 MemoryMax=2G
 MemoryHigh=1500M
 ProtectSystem=strict
 ProtectHome=true
 ReadWritePaths=/var/lib/fc-signage-player /var/log/fc-signage-player
 PrivateTmp=true
 NoNewPrivileges=true
 [Install]
 WantedBy=graphical.target
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-renew.service
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-renew.service
@@ -0,0 +1,6 @@
 [Unit]
 Description=FlowerCore Signage Pi: cert renewal worker
 [Service]
 Type=oneshot
 ExecStart=/usr/local/bin/flowercore-signage-renew-cert.sh
--- a/apps/fc-signage-pi-player/systemd/flowercore-signage-renew.timer
+++ b/apps/fc-signage-pi-player/systemd/flowercore-signage-renew.timer
@@ -0,0 +1,10 @@
 [Unit]
 Description=Daily check for FlowerCore Signage Pi cert renewal
 [Timer]
 OnCalendar=daily
 RandomizedDelaySec=2h
 Persistent=true
 [Install]
 WantedBy=timers.target
--- a/apps/fc-signage-pi-player/tests/display_capability.bats
+++ b/apps/fc-signage-pi-player/tests/display_capability.bats
@@ -0,0 +1,22 @@
 #!/usr/bin/env bats
 setup() {
  APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
  DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
 }
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
  script="$(cat "$DETECT")"
  [[ "$script" == *"displayConnected: false"* ]]
  [[ "$script" == *"No HDMI display detected"* ]]
 }
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
  script="$(cat "$DETECT")"
  [[ "$script" == *"edid-decode"* ]]
  [[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
  [[ "$script" == *"kmsprint"* ]]
  [[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
  [[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
  [[ "$script" == *"capabilities.log"* ]]
 }
--- a/apps/fc-signage-pi-player/tests/identity_bootstrap.bats
+++ b/apps/fc-signage-pi-player/tests/identity_bootstrap.bats
@@ -0,0 +1,64 @@
 #!/usr/bin/env bats
 setup() {
  APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
  BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
  RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
 }
@test "bootstrap is idempotent when node is already enrolled" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
  [[ "$script" == *"already enrolled"* ]]
  [[ "$script" == *"exit 0"* ]]
 }
@test "bootstrap generates a stable node uuid and machine id" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *"uuidgen"* ]]
  [[ "$script" == *"nodeUuid"* ]]
  [[ "$script" == *"machineId"* ]]
  [[ "$script" == *"cut -c1-16"* ]]
 }
@test "bootstrap posts to the canonical register endpoint" {
  grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
  grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
 }
@test "bootstrap retries registration once for first-call races" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *"for attempt in 1 2"* ]]
  [[ "$script" == *"register attempt \$attempt returned"* ]]
  [[ "$script" == *"sleep 5"* ]]
 }
@test "bootstrap supports setup-code approval with manual polling fallback" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *"signage-setup-code"* ]]
  [[ "$script" == *"approve-via-setup-code"* ]]
  [[ "$script" == *"+ 1800"* ]]
  [[ "$script" == *"sleep 15"* ]]
 }
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
  [[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
 }
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
  script="$(cat "$BOOTSTRAP")"
  [[ "$script" == *"openssl pkcs12 -export"* ]]
  [[ "$script" == *"client.p12.pass"* ]]
  [[ "$script" == *"chmod 0640"* ]]
  [[ "$script" == *"chmod 0600"* ]]
 }
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
  script="$(cat "$RENEW")"
  [[ "$script" == *'-checkend $((30*24*3600))'* ]]
  [[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
  [[ "$script" == *"client.key.new"* ]]
  [[ "$script" == *'mv "$CERT_DIR/client.p12.new"   "$CERT_DIR/client.p12"'* ]]
 }
--- a/apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
+++ b/apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
@@ -0,0 +1,68 @@
 #!/usr/bin/env bats
 setup() {
  APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
 }
@test "player unit exists" {
  [ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
 }
@test "player unit uses simple chromium service with restart backoff" {
  unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
  [[ "$unit" == *"Type=simple"* ]]
  [[ "$unit" == *"Restart=always"* ]]
  [[ "$unit" == *"RestartSec=10s"* ]]
  [[ "$unit" == *"StartLimitBurst=5"* ]]
  [[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
 }
@test "player unit caps chromium memory at two gigabytes" {
  grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
  grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
 }
@test "player unit condition-gates startup on identity and p12 certificate" {
  grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
  grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
 }
@test "player unit runs prelaunch checks before chromium" {
  grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
  grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
 }
@test "hdmi udev rule routes through the two-second settle service" {
  rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
  [[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
  [[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
  [[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
 }
@test "hdmi responder settles, declares display, then restarts chromium" {
  responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
  [[ "$responder" == *"sleep 2"* ]]
  [[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
  [[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
 }
@test "chromium policy json is valid and disables credential prompts" {
  command -v jq >/dev/null || skip "jq not installed"
  jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
    "$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
 }
@test "launch script tries embed URL and logs bare-player fallback" {
  launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
  [[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
  [[ "$launch" == *"url-divergence.log"* ]]
  [[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
 }
@test "prelaunch script validates required node and cert files" {
  prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
  [[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
  [[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
  [[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
  [[ "$prelaunch" == *"exit 1"* ]]
 }
--- a/apps/fc-ttsreader/fc-ttsreader.yaml
+++ b/apps/fc-ttsreader/fc-ttsreader.yaml
@@ -532,7 +532,7 @@ spec:
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: web
-          image: localhost/fc-ttsreader-web:v20260506-phase6
+          image: localhost/fc-ttsreader-web:v20260518-sprint36-demo-finish-b132cbf
          imagePullPolicy: Never
          ports:
            - containerPort: 5217
@@ -555,9 +555,13 @@ spec:
            - name: TtsReader__Jobs__Root
              value: "/data/jobs"
            - name: TtsReader__Piper__Host
-              value: "ttsreader-piper.fc-ttsreader.svc.cluster.local."
+              value: "10.0.57.17"
            - name: TtsReader__Piper__Port
-              value: "10200"
+              value: "8500"
            - name: TtsReader__Piper__Transport
              value: "http"
            - name: TtsReader__Piper__HttpPath
              value: "/tts"
            - name: TtsReader__Kokoro__Enabled
              value: "true"
            - name: TtsReader__Kokoro__BaseUrl
--- a/apps/github-runner/.gitattributes
+++ b/apps/github-runner/.gitattributes
@@ -0,0 +1,2 @@
 *.sh text eol=lf
 Dockerfile text eol=lf
--- a/apps/github-runner/Dockerfile
+++ b/apps/github-runner/Dockerfile
@@ -0,0 +1,44 @@
 FROM myoung34/github-runner:latest
 ARG RUBY_VERSION=3.3.11
 ARG RUBY_MINOR=3.3
 ARG RUBY_BUILD_VERSION=v20260326
 ARG RUNNER_UID=1001
 ARG RUNNER_GID=1001
 ENV RUNNER_TOOL_CACHE=/home/runner/_tool
 ENV RUNNER_RUBY_TOOLCACHE=/opt/runner-toolcache
 ENV PATH="/home/runner/_tool/Ruby/${RUBY_MINOR}/x64/bin:/opt/runner-toolcache/Ruby/${RUBY_MINOR}/x64/bin:${PATH}"
 USER root
 RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
        autoconf \
        bison \
        build-essential \
        ca-certificates \
        curl \
        libdb-dev \
        libffi-dev \
        libgdbm-dev \
        libgmp-dev \
        libncurses-dev \
        libreadline-dev \
        libssl-dev \
        libyaml-dev \
        patch \
        pkg-config \
        uuid-dev \
        zlib1g-dev \
    && curl -fsSL "https://github.com/rbenv/ruby-build/archive/refs/tags/${RUBY_BUILD_VERSION}.tar.gz" -o /tmp/ruby-build.tar.gz \
    && mkdir -p /tmp/ruby-build \
    && tar -xzf /tmp/ruby-build.tar.gz --strip-components=1 -C /tmp/ruby-build \
    && /tmp/ruby-build/install.sh \
    && rm -rf /tmp/ruby-build /tmp/ruby-build.tar.gz /var/lib/apt/lists/*
 COPY install-ruby-toolcache.sh /usr/local/bin/install-ruby-toolcache.sh
 RUN chmod +x /usr/local/bin/install-ruby-toolcache.sh \
    && RUBY_VERSION="${RUBY_VERSION}" RUBY_MINOR="${RUBY_MINOR}" TOOLCACHE_ROOT="${RUNNER_RUBY_TOOLCACHE}" RUNNER_UID="${RUNNER_UID}" RUNNER_GID="${RUNNER_GID}" /usr/local/bin/install-ruby-toolcache.sh \
    && ruby -v
--- a/apps/github-runner/README.md
+++ b/apps/github-runner/README.md
@@ -0,0 +1,121 @@
 # GitHub Runner Fleet
 ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
 Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
 ## Runner Shape
 All repo-scoped Linux runners use:
 - `localhost/fc-github-runner:v20260520-ruby3.3.11`, derived from
  `myoung34/github-runner:latest`
 - `ACCESS_TOKEN` from the `github-runner-token` Secret
 - `RUN_AS_ROOT=false`
 - `EPHEMERAL=true`
 - `LABELS=self-hosted,linux,fc-build-linux`
 - writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
  Actions tool cache
 - Ruby 3.3.11 seeded into `/home/runner/_tool/Ruby/3.3/x64` from the baked
  `/opt/runner-toolcache` copy so `ruby/setup-ruby@v1` can discover it on
  self-hosted `ubuntu-20.04-x64` runners
 `github-runner` for `FlowerCore.Common` is single-replica because it retains the
 original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
 two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
 strategy: no two pods share one RWO PVC.
 Sprint 32 final long-tail wave adds 16 two-replica Deployments:
 `FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
 `FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
 `FlowerCore.Distribution`, `FlowerCore.Scoreboard`,
 `FlowerCore.SegmentDisplay`, `FlowerCore.Signage.Contracts`,
 `FlowerCore.SignalControl`, `FlowerCore.Intranet.Web`,
 `FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
 `FlowerCore.MenuBoard`.
 ## Image Build
 Ruby is baked with a pinned `ruby-build` release and Ruby patch version. The pod
 still mounts an `emptyDir` over `/home/runner`, so the `setup-runner-home` init
 container copies the baked toolcache from `/opt/runner-toolcache/Ruby` into
 `/home/runner/_tool/Ruby` before the runner container starts.
 ```bash
 cd apps/github-runner
 podman build -t localhost/fc-github-runner:v20260520-ruby3.3.11 .
 podman run --rm localhost/fc-github-runner:v20260520-ruby3.3.11 ruby -v
 podman run --rm localhost/fc-github-runner:v20260520-ruby3.3.11 \
  test -f /opt/runner-toolcache/Ruby/3.3/x64.complete
 podman save localhost/fc-github-runner:v20260520-ruby3.3.11 \
  -o fc-github-runner-v20260520-ruby3.3.11.tar
 ```
 Import the saved image on every schedulable RKE2 node before ArgoCD rolls the
 Deployments:
 ```bash
 for node in rke2-server rke2-agent1 rke2-agent2; do
  scp fc-github-runner-v20260520-ruby3.3.11.tar "$node:/tmp/"
  ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images rm localhost/fc-github-runner:v20260520-ruby3.3.11 || true'
  ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-github-runner-v20260520-ruby3.3.11.tar'
 done
 ```
 ## Post-Merge Proof
 After the PR is merged and ArgoCD syncs, verify the runner fleet:
 ```bash
 kubectl -n github-runner get deploy,pods,pvc
 ```
 Verify the Ruby toolcache in a fresh pod:
 ```bash
 kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- ruby -v
 kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- sh -c \
  'echo "$RUNNER_TOOL_CACHE" && test -f "$RUNNER_TOOL_CACHE/Ruby/3.3/x64.complete"'
 ```
 Verify GitHub registration for the repo-scoped runners:
 ```bash
 for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
            FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
            FlowerCore.MySQL FlowerCore.Kiosk.Linux FlowerCore.Marquee FlowerCore.TtsReader \
            FlowerCore.Knowledge FlowerCore.LlmBridge FlowerCore.Media \
            FlowerCore.Presentations FlowerCore.RemoteDesktop FlowerCore.DNS \
            FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
            FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
            FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
            FlowerCore.MenuBoard; do
  echo "=== $repo ==="
  gh api "/repos/astoltz/$repo/actions/runners" \
    --jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
 done
 ```
 Shared.Pos publish proof after the runner pod is online:
 ```bash
 gh run list --repo astoltz/FlowerCore.Shared.Pos \
  --workflow "Build, Test & Publish" --branch main --limit 5
 ```
 If the latest run is still queued after runner registration, rerun the workflow
 from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
 ## Failure Notes
 - `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
  `DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
  present on the runner pod.
 - `ruby/setup-ruby@v1` says self-hosted runners must install Ruby in
  `$RUNNER_TOOL_CACHE`: check that the init container copied
  `/opt/runner-toolcache/Ruby` into `/home/runner/_tool/Ruby` and that
  `/home/runner/_tool/Ruby/3.3/x64.complete` exists.
 - `404` during runner registration: the fine-grained PAT is valid but missing
  repository access for that repo. Add the repo to the PAT access list; the PAT
  value does not change.
 - `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
  stay single-replica. New multi-replica runners use `emptyDir`.
--- a/apps/github-runner/github-runner.yaml
+++ b/apps/github-runner/github-runner.yaml
--- a/apps/github-runner/install-ruby-toolcache.sh
+++ b/apps/github-runner/install-ruby-toolcache.sh
@@ -0,0 +1,19 @@
 #!/usr/bin/env bash
 set -euo pipefail
 RUBY_VERSION="${RUBY_VERSION:-3.3.11}"
 RUBY_MINOR="${RUBY_MINOR:-3.3}"
 TOOLCACHE_ROOT="${TOOLCACHE_ROOT:-/opt/runner-toolcache}"
 RUNNER_UID="${RUNNER_UID:-1001}"
 RUNNER_GID="${RUNNER_GID:-1001}"
 RUBY_PREFIX="${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64"
 mkdir -p "${TOOLCACHE_ROOT}/Ruby"
 RUBY_CONFIGURE_OPTS="${RUBY_CONFIGURE_OPTS:---disable-install-doc --disable-yjit}" ruby-build "${RUBY_VERSION}" "${RUBY_PREFIX}"
 touch "${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64.complete"
 ln -sfn "${RUBY_VERSION}" "${TOOLCACHE_ROOT}/Ruby/${RUBY_MINOR}"
 "${RUBY_PREFIX}/bin/ruby" -v
 chown -R "${RUNNER_UID}:${RUNNER_GID}" "${TOOLCACHE_ROOT}"
 chmod -R a+rX "${TOOLCACHE_ROOT}"
--- a/apps/guacamole/guacamole.yaml
+++ b/apps/guacamole/guacamole.yaml
@@ -466,11 +466,11 @@ spec:
  itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
 ---
 ---
-# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
+# 1Password-backed credentials for Mac mini VNC access (Phase 1 <EFBFBD> 2026-04-28)
 # The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
 # Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
 # Guacamole VNC connection password is sourced from the 'VNC Password' field.
-# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
+# Actual IP is 10.0.56.115 (INFRA VLAN) <EFBFBD> the 1P item 'IP' field is kept as backup reference.
 apiVersion: onepassword.com/v1
 kind: OnePasswordItem
 metadata:
@@ -481,6 +481,7 @@ metadata:
    app.kubernetes.io/part-of: flowercore
 spec:
  itemPath: vaults/IAmWorkin/items/Mac Mini
 ---
 # Blue Jay Branding Extension (CSS + translations)
 apiVersion: v1
 kind: ConfigMap
--- a/apps/kubevirt-vms/ci1.yaml
+++ b/apps/kubevirt-vms/ci1.yaml
@@ -1,51 +1,9 @@
 # =============================================================================
-# ci1 — Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
+# ci1 - Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
 # =============================================================================
-# Purpose: dedicated CI runner for FlowerCore.Updater Sandbox E2E nightly +
+# Boots from the sysprepped containerDisk template built by the Windows VM
-# future fleet WPF AAT lanes. Replaces the never-registered
+# sysprep pipeline. See docs/infrastructure/windows-vm-sysprep-pipeline.md.
-# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
+# Path A/B/C install history is preserved in git log only.
 # BLUEJAY-WS registered as a runner (workstation has personal/operator state).
 #
 # Storage layout (2026-05-08):
 #   * ISO is now sourced from Synology NFS (Path B) — see
 #     win2025-iso-nfs-pv.yaml. The Longhorn Filesystem PVC
 #     `windows-server-2025-iso` below is RETAINED but UNUSED so the prior
 #     CDI upload state is preserved as a fallback (and so ArgoCD doesn't
 #     prune it on this commit). It can be deleted in a follow-up commit
 #     after the NFS path is proven on a successful Windows install.
 #
 # Status (2026-05-08): LIVE — Phase 1 prereqs satisfied:
 #   * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes
 #     (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy)
 #   * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet
 #     `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward)
 #   * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to
 #     PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete)
 #   * Local Administrator password generated, stored in 1Password vault
 #     IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu
 #   * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/
 #     prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5
 #     host bridge work lands (Puppet br-prod + enp86s0.57); switching is a
 #     one-line YAML edit + git push.
 #
 # See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
 #
 # Network choice in this draft: **pod-network fallback** (Calico default).
 # Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
 # polls GitHub Actions over HTTPS; no inbound listener needed). Switch to a
 # Multus PROD VLAN NetworkAttachmentDefinition once Multus is installed and the
 # operator wants L2 access from `ci1` to other PROD VLAN services.
 #
 # Sizing: 8 vCPU / 16 GB RAM / 200 GB disk on Longhorn (default storageClass).
 # Capacity check 2026-05-08: each RKE2 node has 16 vCPU / ~64Gi allocatable;
 # 8 vCPU is ~17% of one node's allocatable, fits comfortably.
 #
 # Apply (after operator approval + ISO loaded):
 #   kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/ci1.yaml
 #
 # Connect to console for Windows install:
 #   virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml vnc ci1 -n kubevirt-vms
 #   (Or via Guacamole once a connection profile is added.)
 # =============================================================================
 apiVersion: v1
@@ -57,248 +15,6 @@ metadata:
    pod-security.kubernetes.io/enforce: privileged
 ---
 # ISO PVC — populated via CDI virtctl image-upload (CDI is now installed).
 #
 # **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to
 # `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA
 # CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107
 # with `capabilities.drop: [ALL]` and cannot open the underlying block
 # device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`).
 # Reverted to Filesystem PVC pending one of:
 #   - CDI deployment override granting CAP_SYS_RAWIO to upload pod
 #   - Pre-populated PVC via privileged init pod that dd's the ISO directly
 #   - Migration to a different storage class that exposes block devices
 #     differently (e.g. iSCSI, where Longhorn's CSI mount path may behave
 #     differently)
 #
 # Population workflow (this PVC, Filesystem mode):
 #   1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \
 #        windows-server-2025-iso -n kubevirt-vms \
 #        --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \
 #        --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \
 #        --uploadproxy-url https://localhost:8443 --insecure
 #   (--uploadproxy-url uses port-forward in practice: `kubectl port-forward
 #   -n cdi service/cdi-uploadproxy 8443:443 &` first.)
 #
 # **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed:
 #   BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...)
 #   BdsDxe: failed to start Boot0001 ... Time out
 # Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the
 # timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC.
 # Block mode would likely fix it; see CDI permission issue above.
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: windows-server-2025-iso
  namespace: kubevirt-vms
  labels:
    app: ci-runner
    flowercore.io/managed-by: bluejay-infra
 spec:
  accessModes:
    - ReadWriteOnce          # Bump to ReadOnlyMany after population for multi-VM use
  resources:
    requests:
      storage: 10Gi          # Server 2025 ISO is 7.7GB; 10Gi for headroom
  storageClassName: longhorn
 ---
 # Root disk PVC — empty 200Gi volume that Windows installs into.
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: ci1-rootdisk
  namespace: kubevirt-vms
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 200Gi
  storageClassName: longhorn
 ---
 # Sysprep ConfigMap — autounattend.xml for hands-off Windows install.
 # Sets local Administrator password (REPLACE the placeholder), enables RDP,
 # enables WinRM, sets hostname, and configures static-ish networking via DHCP.
 # The ISO + VirtIO drivers handle the rest.
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: ci1-autounattend
  namespace: kubevirt-vms
 data:
  autounattend.xml: |
    <?xml version="1.0" encoding="utf-8"?>
    <unattend xmlns="urn:schemas-microsoft-com:unattend">
      <!-- Pass 1: WindowsPE — Disk setup and VirtIO driver injection -->
      <settings pass="windowsPE">
        <component name="Microsoft-Windows-International-Core-WinPE"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <SetupUILanguage>
            <UILanguage>en-US</UILanguage>
          </SetupUILanguage>
          <InputLocale>en-US</InputLocale>
          <SystemLocale>en-US</SystemLocale>
          <UILanguage>en-US</UILanguage>
          <UserLocale>en-US</UserLocale>
        </component>
        <component name="Microsoft-Windows-PnpCustomizationsWinPE"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <DriverPaths>
            <PathAndCredentials wcm:action="add" wcm:keyValue="1">
              <Path>E:\amd64\2k25</Path>
            </PathAndCredentials>
          </DriverPaths>
        </component>
        <component name="Microsoft-Windows-Setup"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <DiskConfiguration>
            <Disk wcm:action="add">
              <DiskID>0</DiskID>
              <WillWipeDisk>true</WillWipeDisk>
              <CreatePartitions>
                <CreatePartition wcm:action="add">
                  <Order>1</Order>
                  <Size>260</Size>
                  <Type>EFI</Type>
                </CreatePartition>
                <CreatePartition wcm:action="add">
                  <Order>2</Order>
                  <Size>128</Size>
                  <Type>MSR</Type>
                </CreatePartition>
                <CreatePartition wcm:action="add">
                  <Order>3</Order>
                  <Extend>true</Extend>
                  <Type>Primary</Type>
                </CreatePartition>
              </CreatePartitions>
              <ModifyPartitions>
                <ModifyPartition wcm:action="add">
                  <Order>1</Order>
                  <PartitionID>1</PartitionID>
                  <Format>FAT32</Format>
                  <Label>EFI</Label>
                </ModifyPartition>
                <ModifyPartition wcm:action="add">
                  <Order>2</Order>
                  <PartitionID>2</PartitionID>
                </ModifyPartition>
                <ModifyPartition wcm:action="add">
                  <Order>3</Order>
                  <PartitionID>3</PartitionID>
                  <Format>NTFS</Format>
                  <Label>Windows</Label>
                </ModifyPartition>
              </ModifyPartitions>
            </Disk>
          </DiskConfiguration>
          <ImageInstall>
            <OSImage>
              <InstallTo>
                <DiskID>0</DiskID>
                <PartitionID>3</PartitionID>
              </InstallTo>
              <!-- Index 2 = Standard Desktop Experience. Use 4 for Datacenter Desktop. -->
              <InstallFrom>
                <MetaData wcm:action="add">
                  <Key>/IMAGE/INDEX</Key>
                  <Value>2</Value>
                </MetaData>
              </InstallFrom>
            </OSImage>
          </ImageInstall>
          <UserData>
            <AcceptEula>true</AcceptEula>
            <FullName>FlowerCore CI Runner</FullName>
            <Organization>FlowerCore</Organization>
            <!-- Eval install — no product key needed for 180-day evaluation -->
          </UserData>
        </component>
      </settings>
      <!-- Pass 4: Specialize — Hostname, RDP, WinRM -->
      <settings pass="specialize">
        <component name="Microsoft-Windows-Shell-Setup"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <ComputerName>CI1</ComputerName>
          <TimeZone>Central Standard Time</TimeZone>
        </component>
        <component name="Microsoft-Windows-TerminalServices-LocalSessionManager"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <fDenyTSConnections>false</fDenyTSConnections>
        </component>
      </settings>
      <!-- Pass 7: OOBE — Admin account, RDP firewall, WinRM -->
      <settings pass="oobeSystem">
        <component name="Microsoft-Windows-Shell-Setup"
                   processorArchitecture="amd64"
                   publicKeyToken="31bf3856ad364e35"
                   language="neutral" versionScope="nonSxS">
          <OOBE>
            <HideEULAPage>true</HideEULAPage>
            <HideLocalAccountScreen>true</HideLocalAccountScreen>
            <HideOEMRegistrationScreen>true</HideOEMRegistrationScreen>
            <HideOnlineAccountScreens>true</HideOnlineAccountScreens>
            <HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE>
            <ProtectYourPC>3</ProtectYourPC>
          </OOBE>
          <UserAccounts>
            <AdministratorPassword>
              <!-- Real password is in 1Password — vault qaphopopkryhbg353ukzhhuqoq,
                   item id h3ix4mgfk65gmkcmvh6ly3d3hu, title:
                   "ci1 Administrator (Windows Server 2025 KubeVirt VM)".
                   Field "autounattend AdministratorPassword Value (UTF-16-LE base64)"
                   matches the Value below.
                   To rotate: regenerate, recompute base64
                     $combined = $pw + "AdministratorPassword"
                     [Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($combined))
                   then update both 1P item AND this Value field, recreate VM. -->
              <Value>bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
              <PlainText>false</PlainText>
            </AdministratorPassword>
          </UserAccounts>
          <FirstLogonCommands>
            <SynchronousCommand wcm:action="add">
              <Order>1</Order>
              <CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Set-NetFirewallRule -DisplayGroup 'Remote Desktop' -Enabled True"</CommandLine>
              <Description>Enable RDP firewall rule</Description>
            </SynchronousCommand>
            <SynchronousCommand wcm:action="add">
              <Order>2</Order>
              <CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Enable-PSRemoting -Force; Set-Item WSMan:\localhost\Service\Auth\Basic $true; Set-Item WSMan:\localhost\Service\AllowUnencrypted $true"</CommandLine>
              <Description>Enable WinRM (Phase 2 will pivot to HTTPS via step-ca cert)</Description>
            </SynchronousCommand>
            <SynchronousCommand wcm:action="add">
              <Order>3</Order>
              <CommandLine>cmd.exe /c reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System" /v EnableLUA /t REG_DWORD /d 0 /f</CommandLine>
              <Description>Disable UAC (Phase 2 Puppet will re-evaluate)</Description>
            </SynchronousCommand>
          </FirstLogonCommands>
        </component>
      </settings>
    </unattend>
 ---
 # VirtualMachine — Windows Server 2025 CI runner.
 apiVersion: kubevirt.io/v1
 kind: VirtualMachine
 metadata:
@@ -309,33 +25,7 @@ metadata:
    role: github-actions-runner
    flowercore.io/managed-by: bluejay-infra
 spec:
-  # `running: true` is deprecated in favor of `runStrategy`. They are mutually
+  runStrategy: Always
  # exclusive — KubeVirt's validating webhook rejects any VM that sets both:
  #   admission webhook "virtualmachine-validator.kubevirt.io" denied the request:
  #   Running and RunStrategy are mutually exclusive.
  # `Always` keeps a VMI running and restarts it if it crashes/exits — same
  # semantics as the old `running: true`.
  #
  # **2026-05-08 status: VM cannot start due to a stale QEMU flock on the
  # rootdisk PVC** (qemu reports `Failed to get "write" lock` on
  # `/var/run/kubevirt-private/vmi-disks/rootdisk/disk.img`). The flock was
  # left by a previous QEMU process during a force-deleted launcher pod
  # cycle. Recovery requires either (a) a Longhorn engine restart on
  # rke2-agent2, (b) a Longhorn volume detach via the longhorn-manager API
  # (kubectl patch on `volume.longhorn.io/<pvc-name>` does not work — the
  # spec.nodeID is reconciled back), or (c) a node reboot of rke2-agent2.
  #
  # **Confirmed working:** the bootOrder swap (windows-iso=1, rootdisk=2)
  # and the runStrategy migration (above). The ISO PVC was successfully
  # repopulated via virtctl image-upload pvc on the Filesystem-mode PVC.
  #
  # **Open: SATA CDROM read timeout** — even with bootOrder=1, OVMF reported
  # `BdsDxe: failed to start Boot0001 ... Time out` reading the SATA CDROM
  # backed by the Filesystem-mode PVC. A switch to Block-mode DataVolume
  # was attempted but blocked by a CDI v1.65.0 upload-pod permission issue
  # (capability drop prevents writing to the underlying block device).
  # See header docstring on the ISO PVC.
  runStrategy: Always   # LIVE — ISO uploaded 2026-05-08, password in 1P
  template:
    metadata:
      labels:
@@ -377,73 +67,16 @@ spec:
        firmware:
          bootloader:
            efi:
              # 2026-05-08: SecureBoot=false during initial install. With SecureBoot
              # enabled, OVMF's BdsDxe times out reading Boot0001 from the SCSI
              # CDROM ("BdsDxe: failed to start Boot0001 ... Time out") before the
              # EFI bootloader signature can verify against the OVMF VARS trust DB.
              # KubeVirt's `/usr/share/OVMF/OVMF_VARS.secboot.fd` template doesn't
              # appear to include the Microsoft KEK/DB by default, so signed
              # Windows EFI bootloaders fail validation. Disabling SecureBoot lets
              # OVMF skip the chain check and boot directly. This is acceptable for
              # a CI runner — TPM 2.0 is still emulated (`tpm: {}` below) so
              # BitLocker / Hyper-V / WSL still work.
              # When the operator wants SecureBoot back, the path is:
              #   1. Custom-build OVMF_VARS.fd with Microsoft KEK/DB enrolled
              #   2. Mount it into the VM via firmware.bootloader.efi.persistent
              #   3. Set secureBoot: true again
              # Tracked separately from the install unblock.
              secureBoot: false
        devices:
-          tpm: {}             # Non-persistent vTPM — sufficient for runner; no BitLocker
+          tpm: {}
          disks:
            # bootOrder: ISO must be 1 for first-boot install (the rootdisk has no
            # EFI bootloader yet). After Windows installs, it writes its own UEFI
            # Boot#### entries pointing at the rootdisk's EFI partition; UEFI then
            # boots from rootdisk going forward and the ISO at bootOrder:2 acts as
            # a fallback for re-install scenarios.
            #
            # Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried
            # the empty virtio disk first, got nothing, fell back to the SATA
            # CDROM at Boot0001 with a short timeout, and timed out before the
            # CDROM enumerated. Console showed:
            #   BdsDxe: failed to start Boot0001 ... Time out
            #   BdsDxe: No bootable option or device was found.
            # Confirmed via debug pod: PVC content IS a real bootable ISO9660
            # (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the
            # only bug was boot priority.
            # 2026-05-08 PM: cdrom bus SCSI + containerDisk delivery. This
            # combination boots qemu cleanly and reaches OVMF, but OVMF
            # BdsDxe still hits "starting Boot0001 ... Time out" on the
            # cdrom — see HANDOFF.md / CODEX-STATUS.md "OPEN — ci1" for the
            # full diagnostic chain. virtio-blk disk swap was attempted as a
            # workaround but introduced a separate QEMU rootdisk flock issue
            # without fixing the underlying OVMF cdrom problem; reverted.
            # Operator decision needed for next architectural step (OVMF
            # custom build with extended timeout, KubeVirt version bump,
            # Hyper-V/VirtualBox-and-export, or BIOS legacy boot). The
            # containerDisk distribution pipeline (build/save/scp/ctr import)
            # is proven and ready to reuse for any of those.
            - name: windows-iso
              bootOrder: 1
              cdrom:
                bus: scsi
            - name: rootdisk
              bootOrder: 2
              disk:
                bus: virtio
            - name: virtio-drivers
              cdrom:
                bus: sata
            - name: sysprep
              cdrom:
                bus: sata
          interfaces:
-            # Pod-network fallback for Phase 1. To switch to PROD VLAN once Multus
+            # Pod-network fallback for CI runner outbound traffic. Switch to
-            # + the prod-vlan57 NAD exist, replace this block with:
+            # prod-vlan57 once the bridge/NAD lane is ready for L2 access.
            #   - name: prod-net
            #     bridge: {}
            #     model: virtio
            # and update the networks: stanza to use multus.networkName: kubevirt-vms/prod-vlan57
            - name: default
              masquerade: {}
              model: virtio
@@ -454,55 +87,7 @@ spec:
          pod: {}
      volumes:
        - name: rootdisk
          persistentVolumeClaim:
            claimName: ci1-rootdisk
        - name: windows-iso
          # 2026-05-08 PM (Path C, CONTAINERDISK): the ISO is now packaged as
          # a KubeVirt containerDisk OCI image baked from
          # `FROM scratch ; ADD --chown=107:107 disk.img /disk/disk.img`.
          # The qemu user (uid 107) reads the ISO directly from a tmpfs view
          # of the OCI layer, bypassing both:
          #   - Synology NFS export ACL (Path B failed: uid 107 denied at
          #     directory level even with mode 0777, see memory
          #     feedback_synology_iso_export_root_only_uid_107_denied)
          #   - OVMF cdrom read-window timeout (Path A and Path B's SCSI
          #     retry both hit `BdsDxe: failed to start Boot0001 ... Time out`
          #     when the cdrom was backed by a PVC the storage controller
          #     couldn't satisfy reads from fast enough).
          #
          # Image build (one-time, per ISO version):
          #   1. Copy ISO to disk.img, write Dockerfile
          #   2. podman build --tag localhost/win-server-2025:1.0 .  (on noc1)
          #   3. podman save -o win-server-2025-1.0.tar localhost/win-server-2025:1.0
          #   4. SCP tar to all 3 RKE2 nodes (rke2-server, rke2-agent1, rke2-agent2)
          #   5. sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
          #        -n k8s.io images import /tmp/win-server-2025-1.0.tar
          # Standard FC pattern per `feedback_rke2_localhost_imagepullpolicy`.
          #
          # When a new Windows ISO version ships, bump the tag (1.1, 1.2, ...),
          # rebuild + redistribute, and update the image: line below in a new
          # commit. KubeVirt picks up the new image via a VM restart.
          #
          # The legacy NFS PVC + PV (apps/kubevirt-vms/win2025-iso-nfs-pv.yaml)
          # and CDI Longhorn PVC (`windows-server-2025-iso`) are RETAINED for
          # this commit so the prior states are recoverable. Once the
          # containerDisk path proves on a successful Windows install, both
          # legacy artifacts can be pruned in a follow-up commit.
          containerDisk:
-            image: localhost/win-server-2025:1.0
+            image: localhost/fc-win-server-2025:v1
            imagePullPolicy: Never
        - name: virtio-drivers
          containerDisk:
            # Pinned to v1.8.2 (latest stable as of 2026-05-08).
            # The :latest tag uses Docker manifest v1 schema which containerd
            # 2.1 (RKE2 v1.34.5) refuses to pull with:
            #   "media type application/vnd.docker.distribution.manifest.v1+prettyjws
            #    is no longer supported since containerd v2.1"
            # v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1.
            # Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags
            image: quay.io/kubevirt/virtio-container-disk:v1.8.2
        - name: sysprep
          sysprep:
            configMap:
              name: ci1-autounattend
      terminationGracePeriodSeconds: 3600
--- a/apps/kubevirt-vms/kustomization.yaml
+++ b/apps/kubevirt-vms/kustomization.yaml
@@ -0,0 +1,3 @@
 resources:
  - ci1.yaml
  - prod-vlan57-nad.yaml
--- a/apps/monitoring/noc-monitoring.yaml
+++ b/apps/monitoring/noc-monitoring.yaml
@@ -75,6 +75,20 @@ data:
              cluster: "rke2"
              role: "agent"
      # Mac mini macOS runner node (INFRA VLAN)
      - job_name: "macmini-node"
        scrape_timeout: 15s
        static_configs:
          - targets: ["10.0.56.115:9100"]
            labels:
              instance: "macmini"
              host: "macmini.iamworkin.lan"
              vlan: "infra"
              arch: "arm64"
              role: "macos-runner"
              puppet_managed: "true"
              puppet_server: "puppet.iamworkin.lan"
      # In-cluster node-exporter DaemonSet
      - job_name: "k8s-node-exporter"
        kubernetes_sd_configs:
@@ -266,13 +280,14 @@ data:
              printer_model: "NuPrint 210"
      # Print.Web health (Blazor app on edge2:5200)
      # Target `/health` (anonymous) — root path requires API key auth and returns 401.
      - job_name: "probe-printweb"
        metrics_path: /probe
        params:
          module: [http_2xx]
        scrape_interval: 30s
        static_configs:
-          - targets: ["http://10.0.57.16:5200/"]
+          - targets: ["http://10.0.57.16:5200/health"]
            labels:
              instance: "print-web"
              service: "print-web"
@@ -697,6 +712,36 @@ data:
              summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
              description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
      - name: macmini-runners
        rules:
          - alert: MacMiniRunnerOffline
            expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
            for: 10m
            labels:
              severity: warning
              service: github-runner
            annotations:
              summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
              description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
      - name: linux-runners
        rules:
          - alert: LinuxRunnerOffline
            expr: |
              kube_deployment_status_replicas_ready{
                namespace="github-runner",
                deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
              } == 0
            for: 5m
            labels:
              severity: warning
              alert_channel: irc
              service: github-runner
              team: ci
            annotations:
              summary: "Linux CI runner offline: {{ $labels.deployment }}"
              description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
      - name: remote-desktop
        rules:
          - alert: RemoteDesktopWebDown
@@ -922,6 +967,52 @@ data:
            annotations:
              summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
      # Puppet agent + service alerts.
      # Mirror of FlowerCore.Notes/scripts/monitoring/alerts.yml `puppet` group
      # so a future migration to in-cluster Prometheus inherits the ruleset.
      # Source-of-truth for the live Podman Prometheus on noc1 is the Notes file.
      # See feedback_monitoring_k8s_target_vs_live_podman.
      - name: puppet
        rules:
          - alert: PuppetAgentReportStale
            expr: puppet_last_run_age_seconds > 7200
            for: 30m
            labels:
              severity: warning
              alert_channel: irc
            annotations:
              summary: "Puppet agent {{ $labels.instance }} hasn't reported in over 2h"
              description: "Last run age: {{ $value | humanizeDuration }}. The puppet agent on {{ $labels.instance }} may be stopped, the node may be powered off, or noc1 may be unreachable from this node."
              runbook: "1. SSH to node (via noc1 jumpbox if needed) 2. sudo systemctl status puppet 3. sudo puppet agent -t --noop to force a run 4. Check r10k: ssh fcadmin@10.0.56.10 'sudo podman logs openvoxserver --tail 50' 5. Verify noc1 reachability: ping puppet.iamworkin.lan"
          - alert: PuppetAgentReportCritical
            expr: puppet_last_run_age_seconds > 86400
            for: 1h
            labels:
              severity: critical
              alert_channel: irc
            annotations:
              summary: "Puppet agent {{ $labels.instance }} silent for over 24h — node is unmanaged"
              description: "Last run age: {{ $value | humanizeDuration }}. Node {{ $labels.instance }} has not submitted a Puppet report in over 24 hours. Config drift is accumulating — investigate immediately. If intentional (maintenance), add to the exclusion filter or silence in Grafana."
              runbook: "URGENT: 1. Check node power state 2. SSH via noc1 jumpbox: ssh fcadmin@10.0.56.10 then ssh <node> 3. sudo systemctl status puppet 4. sudo systemctl start puppet + sudo puppet agent -t 5. Check for network partitions (VLAN connectivity to 10.0.56.10) 6. If node was recently reimaged: sudo puppet agent -t to re-register with new SSL cert"
          # Sprint 33 Cx-7 Phase B (2026-05-25 postmortem follow-up):
          # Detects puppet.service in failed state — distinct from PuppetAgentReportStale
          # which catches "agent hasn't run." This catches "systemd gave up restarting it"
          # (CA-verify loop or other fatal exit). Requires node-exporter systemd collector
          # enabled with --collector.systemd. If `node_systemd_unit_state` has no series
          # for a node, the collector is disabled there — flag in postmortem follow-up.
          - alert: PuppetServiceFailed
            expr: node_systemd_unit_state{name="puppet.service",state="failed"} == 1
            for: 5m
            labels:
              severity: warning
              alert_channel: irc
            annotations:
              summary: "Puppet service failed on {{ $labels.instance }}"
              description: "puppet.service on {{ $labels.instance }} has been in failed state for 5+ minutes. systemd has stopped auto-restarting (CA-verify-loop or other exit). Manual `systemctl status puppet` confirms. Run `sudo systemctl start puppet` to recover; investigate journal for root cause."
              runbook_url: "https://github.com/astoltz/FlowerCore.Notes/blob/master/memory/feedback_puppet_service_dead_after_ca_loop_alert_misreads.md"
      # K8s pod-state alerts. Require kube-state-metrics scrape (added
      # 2026-04-26 — see scrape_configs above). Would have surfaced the
      # agent-zero ollama-proxy 172x crash-loop instead of letting it
@@ -1183,24 +1274,55 @@ metadata:
 data:
  notify.py: |
    #!/usr/bin/env python3
-    """HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
+    """HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
-    Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
+
-    Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
+    Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
    /api/print/alert. Thermal printing is BATCHED into hourly digests by
    default so the printer no longer spam-fires per Grafana webhook.
    Routing (per Grafana webhook alert):
      - IRC: always per-event (operator likes the stream)
      - Thermal printer:
          * severity in {critical,disaster,page} OR
            label alert_channel=thermal_print_immediate -> print NOW
          * label alert_channel=thermal_print -> enqueue into hourly digest
          * everything else -> IRC only
      - RESOLVED webhooks remove the alert from the digest buffer
    Env vars (defaults preserve old behavior on first deploy):
      THERMAL_PRINT_ENABLED  default "true"   - master kill switch
      BATCH_INTERVAL_MIN     default "60"     - minutes between digest prints
      BATCH_MAX_PENDING      default "50"     - force-flush threshold
    HTTP surface:
      POST /         - Grafana webhook entry
      POST /flush    - manual digest flush (idempotent)
      GET  /         - status + config + buffer depth + stats
    """
-    import json, socket, sys, time
+    import json, os, socket, sys, threading, time
    from collections import defaultdict
    from datetime import datetime, timezone
    from http.server import HTTPServer, BaseHTTPRequestHandler
    from urllib.request import Request, urlopen
    from urllib.error import URLError
-    IRC_HOST = "unrealircd.irc.svc"  # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
+    THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
-    IRC_PORT = 6667
+    BATCH_INTERVAL_MIN    = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
-    IRC_NICK = "grafana-bot"
+    BATCH_MAX_PENDING     = int(os.environ.get("BATCH_MAX_PENDING", "50"))
-    IRC_CHANNEL = "#alerts"
+
-    PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
+    IRC_HOST      = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
-    PRINT_ENABLED = True
+    IRC_PORT      = int(os.environ.get("IRC_PORT", "6667"))
    IRC_NICK      = os.environ.get("IRC_NICK", "grafana-bot")
    IRC_CHANNEL   = os.environ.get("IRC_CHANNEL", "#alerts")
    PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
    _buffer_lock = threading.Lock()
    _buffer = {}   # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
    _last_flush_time = time.time()
    _stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
              "digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
              "buffer_resolved": 0, "started_at": time.time()}
    def send_irc(message):
        """Connect, handle PING, join, send, quit."""
        try:
            sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
            sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
@@ -1233,52 +1355,137 @@ data:
            time.sleep(0.5)
            sock.sendall(b"QUIT :alert delivered\r\n")
            sock.close()
            _stats["irc_sent"] += 1
            return True
        except Exception as e:
            print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
            return False
-    def send_thermal_print(alert):
+    def post_thermal(payload, kind):
-        if not PRINT_ENABLED: return
+        if not THERMAL_PRINT_ENABLED:
-        labels = alert.get("labels", {})
+            print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
-        annotations = alert.get("annotations", {})
+            return False
        status = alert.get("status", "firing").upper()
        summary = annotations.get("summary", "")
        description = annotations.get("description", "")
        runbook = annotations.get("runbook", "")
        # Build a useful message: summary + description + runbook steps
        parts = []
        if summary: parts.append(summary)
        if description and description != summary: parts.append(description)
        if runbook: parts.append("STEPS: " + runbook)
        message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
        payload = {
            "title": labels.get("alertname", "Unknown"),
            "severity": labels.get("severity", "warning").capitalize(),
            "host": labels.get("instance", labels.get("host", "unknown")),
            "message": message,
            "eventId": alert.get("fingerprint", ""),
            "source": "Grafana",
            "status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
            "acknowledged": False
        }
        try:
            req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
                          headers={"Content-Type": "application/json"}, method="POST")
            resp = urlopen(req, timeout=10)
-            print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
+            if kind == "immediate": _stats["print_immediate"] += 1
            print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
            return True
        except Exception as e:
-            print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
+            print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
    def should_print(alert):
        labels = alert.get("labels", {})
        if labels.get("alert_channel") == "thermal_print": return True
        if labels.get("severity", "").lower() in ("critical", "disaster"): return True
        if alert.get("status", "").upper() == "RESOLVED": return False
            return False
    def fingerprint_of(alert):
        fp = alert.get("fingerprint", "")
        if fp: return fp
        labels = alert.get("labels", {})
        target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
        return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
    def is_critical(alert):
        return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
    def is_immediate_label(alert):
        return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
    def is_batched_label(alert):
        return alert.get("labels", {}).get("alert_channel") == "thermal_print"
    def add_to_digest(alert):
        """Add an alert to the digest buffer. Returns True if the buffer GREW
        (new fingerprint), False if it was a dedup, resolution, or no-op.
        """
        if not THERMAL_PRINT_ENABLED: return False
        fp = fingerprint_of(alert)
        status = alert.get("status", "firing").lower()
        with _buffer_lock:
            if status == "resolved":
                if fp in _buffer:
                    del _buffer[fp]
                    _stats["buffer_resolved"] += 1
                return False
            if fp in _buffer:
                _buffer[fp]["last_seen"] = time.time()
                _buffer[fp]["alert"] = alert
                _stats["buffer_dedup"] += 1
                return False
            _buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
            _stats["buffer_added"] += 1
            return True
    def build_digest_payload():
        with _buffer_lock:
            items = list(_buffer.values())
        if not items: return None
        by_name = defaultdict(list)
        for item in items:
            labels = item["alert"].get("labels", {})
            by_name[labels.get("alertname", "Unknown")].append(item)
        lines = []
        for name, group in sorted(by_name.items()):
            targets = []
            for it in group[:5]:
                labels = it["alert"].get("labels", {})
                t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
                     or labels.get("statefulset") or labels.get("namespace") or "?")
                targets.append(t)
            more = f" (+{len(group)-5})" if len(group) > 5 else ""
            sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
            lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
        now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
        title = f"Alert digest: {len(items)} firing"
        body = "\n".join([
            f"=== {title} ===",
            f"as of {now}",
            "",
            *lines,
            "",
            "Stream: #alerts (IRC)  |  Triage: grafana-noc1.iamworkin.lan",
            "Force-flush: POST irc-notify.monitoring.svc:9119/flush",
        ])
        return {"title": title, "severity": "Warning", "host": "monitoring",
                "message": body, "eventId": f"digest-{int(time.time())}",
                "source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
    def flush_digest():
        payload = build_digest_payload()
        if payload is None:
            print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
            return False
        sent = post_thermal(payload, "digest")
        with _buffer_lock:
            _buffer.clear()
        if sent: _stats["digest_flushed"] += 1
        return sent
    def digest_loop():
        global _last_flush_time
        while True:
            try:
                now = time.time()
                elapsed = now - _last_flush_time
                if elapsed >= BATCH_INTERVAL_MIN * 60:
                    print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
                    flush_digest()
                    _last_flush_time = now
                elif len(_buffer) >= BATCH_MAX_PENDING:
                    print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
                    flush_digest()
                    _last_flush_time = now
                time.sleep(15)
            except Exception as e:
                print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
                time.sleep(60)
    class Handler(BaseHTTPRequestHandler):
        def do_POST(self):
            if self.path == "/flush":
                ok = flush_digest()
                self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
                self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
                return
            _stats["webhooks_received"] += 1
            length = int(self.headers.get("Content-Length", 0))
            body = json.loads(self.rfile.read(length)) if length else {}
            for alert in body.get("alerts", []):
@@ -1293,22 +1500,56 @@ data:
                msg = f"{icon}{sev_tag} {name}: {summary}"
                if desc: msg += f"\n  {desc}"
                send_irc(msg)
-                if should_print(alert): send_thermal_print(alert)
+                # Thermal routing — EVERYTHING (including criticals) goes into
-            self.send_response(200)
+                # the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
-            self.send_header("Content-Type", "application/json")
+                # label bypasses, and even that flushes-the-current-digest rather
-            self.end_headers()
+                # than printing a standalone job, so the same fingerprint can't
                # spam the printer per webhook cycle.
                if status == "RESOLVED":
                    add_to_digest(alert)  # removes from buffer
                    continue
                if is_immediate_label(alert):
                    # Explicit opt-in for "paper this NOW" — first arrival of a
                    # new fingerprint triggers an immediate digest flush; repeat
                    # webhooks for the same fingerprint dedupe in the buffer
                    # until the next interval or until the alert resolves.
                    new_in_buffer = add_to_digest(alert)
                    if new_in_buffer:
                        global _last_flush_time
                        flush_digest()
                        _last_flush_time = time.time()
                elif is_critical(alert) or is_batched_label(alert):
                    add_to_digest(alert)
                # else: IRC-only (warnings without thermal_print label)
            self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
            self.wfile.write(b'{"status":"ok"}')
        def do_GET(self):
-            self.send_response(200)
+            self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
-            self.send_header("Content-Type", "application/json")
+            with _buffer_lock:
-            self.end_headers()
+                alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
-            self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
+                depth = len(_buffer)
            info = {
                "service": "irc-notify",
                "config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
                           "batch_interval_min": BATCH_INTERVAL_MIN,
                           "batch_max_pending": BATCH_MAX_PENDING,
                           "irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
                           "print_web_url": PRINT_WEB_URL},
                "buffer": {"depth": depth, "alertnames": alertnames,
                           "seconds_since_last_flush": int(time.time() - _last_flush_time),
                           "seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
                "stats": _stats,
            }
            self.wfile.write(json.dumps(info, indent=2).encode())
        def log_message(self, format, *args):
            print(f"[irc-notify] {args[0]}", file=sys.stderr)
    if __name__ == "__main__":
        threading.Thread(target=digest_loop, daemon=True).start()
        server = HTTPServer(("0.0.0.0", 9119), Handler)
-        print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
+        print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
        server.serve_forever()
 # =============================================================================
@@ -3395,6 +3636,39 @@ data:
                relativeTimeRange: {from: 120, to: 0}
                datasourceUid: __expr__
                model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
      - orgId: 1
        name: CI Runners
        folder: CI Alerts
        interval: 1m
        rules:
          - uid: linux-runner-offline
            title: LinuxRunnerOffline
            condition: C
            for: 5m
            noDataState: OK
            execErrState: Error
            annotations:
              summary: "Linux CI runner offline: {{ $labels.deployment }}"
              description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
              runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
            labels:
              severity: warning
              service: github-runner
              alert_channel: irc
              team: ci
            data:
              - refId: A
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: prometheus
                model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
              - refId: B
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: __expr__
                model: {type: reduce, expression: A, reducer: last, refId: B}
              - refId: C
                relativeTimeRange: {from: 300, to: 0}
                datasourceUid: __expr__
                model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
      - orgId: 1
        name: Infrastructure
        folder: AI Stack Alerts
@@ -3427,6 +3701,32 @@ data:
                relativeTimeRange: {from: 120, to: 0}
                datasourceUid: __expr__
                model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
          - uid: macmini-runner-offline
            title: MacMiniRunnerOffline
            condition: C
            for: 10m
            noDataState: Alerting
            execErrState: OK
            annotations:
              summary: Mac mini GitHub runner offline
              description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
              runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
            labels:
              severity: warning
              service: github-runner
            data:
              - refId: A
                relativeTimeRange: {from: 600, to: 0}
                datasourceUid: prometheus
                model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
              - refId: B
                relativeTimeRange: {from: 600, to: 0}
                datasourceUid: __expr__
                model: {type: reduce, expression: A, reducer: last, refId: B}
              - refId: C
                relativeTimeRange: {from: 600, to: 0}
                datasourceUid: __expr__
                model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
          - uid: high-cpu
            title: High CPU (>85%)
            condition: C
--- a/apps/worldbuilder/README.md
+++ b/apps/worldbuilder/README.md
@@ -28,9 +28,12 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
   Memory: `feedback_rke2_image_import_per_node_scp`.
 3. **Bump image tag** in `worldbuilder.yaml` and git push.
   ArgoCD ApplicationSet picks up within ~3 minutes.
-4. **First production render** — open `https://worldbuilder.iamworkin.lan`,
+4. **First production render** — open
-   create World → Character → Storyboard → ExportJob, confirm artifact
+   `https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
-   downloads. ComfyUI lives on BLUEJAY-WS at `http://10.0.56.20:8188`.
+   and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
   generated images. This Sprint 32 visitor-safe profile uses
   `ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
   for an operator-owned GPU render lane.
 ## Health probes
@@ -53,8 +56,13 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
 ## Image generation backend
-`FlowerCore:WorldBuilder:ImageGeneration:BaseUrl=http://10.0.56.20:8188` —
+Sprint 32 pins the Kubernetes profile to
-ComfyUI runs on BLUEJAY-WS Windows (R9700 / gfx1201 / ROCm 7.2.1). Pod reaches
+`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
-the workstation directly across the 10.0.56.0/24 VLAN (no Podman-style host-
+`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
-filter issues — K8s pods route via Calico, which is L3-routed across the
+deterministic, avoids GPU exposure, and still exercises the studio/gallery
-VLAN).
+surface with persisted generated-image metadata.
 The previous ComfyUI backend target was `http://10.0.56.20:8188` on
 BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
 operator-owned follow-up that also verifies workstation reachability and image
 import freshness.
--- a/apps/worldbuilder/worldbuilder.yaml
+++ b/apps/worldbuilder/worldbuilder.yaml
@@ -16,7 +16,11 @@ kind: Namespace
 metadata:
  name: fc-worldbuilder
  labels:
    app.kubernetes.io/name: fc-worldbuilder
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 ---
 # SQLite DB + generated image gallery + PDF/PNG exports.
 # Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
@@ -25,6 +29,13 @@ kind: PersistentVolumeClaim
 metadata:
  name: worldbuilder-data
  namespace: fc-worldbuilder
  labels:
    app.kubernetes.io/name: worldbuilder-data
    app.kubernetes.io/component: storage
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  accessModes:
    - ReadWriteOnce
@@ -40,7 +51,13 @@ metadata:
  namespace: fc-worldbuilder
  labels:
    app.kubernetes.io/name: worldbuilder-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
  annotations:
    flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
 spec:
  replicas: 1
  revisionHistoryLimit: 3
@@ -54,11 +71,16 @@ spec:
    metadata:
      labels:
        app.kubernetes.io/name: worldbuilder-web
        app.kubernetes.io/component: web
        app.kubernetes.io/part-of: flowercore
        app.kubernetes.io/managed-by: argocd
        flowercore.io/tenant-id: system
        flowercore.io/created-by: bluejay-infra
      annotations:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics/prometheus"
        flowercore.io/audit-trace-id: "worldbuilder-runtime-demo"
    spec:
      securityContext:
        fsGroup: 1654
@@ -92,11 +114,14 @@ spec:
              value: "/data/gallery"
            - name: FlowerCore__WorldBuilder__Export__RootPath
              value: "/data/exports"
-            # ComfyUI on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1).
+            # Visitor-safe Sprint 32 profile: fake backend keeps public demo
            # rendering deterministic and avoids exposing BLUEJAY-WS GPU.
            - name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
-              value: "http://10.0.56.20:8188"
+              value: "http://127.0.0.1:1"
            - name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
-              value: "comfyui"
+              value: "fake"
            - name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
              value: "fake"
          resources:
            # Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
            # time) while actual CPU usage is well below capacity. Idle Blazor
@@ -165,7 +190,11 @@ metadata:
  namespace: fc-worldbuilder
  labels:
    app.kubernetes.io/name: worldbuilder-web
    app.kubernetes.io/component: web
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  type: ClusterIP
  selector:
@@ -180,6 +209,13 @@ kind: Certificate
 metadata:
  name: worldbuilder-web-tls
  namespace: fc-worldbuilder
  labels:
    app.kubernetes.io/name: worldbuilder-web-tls
    app.kubernetes.io/component: ingress
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  secretName: worldbuilder-web-tls
  issuerRef:
@@ -200,6 +236,13 @@ kind: IngressRoute
 metadata:
  name: worldbuilder-web
  namespace: fc-worldbuilder
  labels:
    app.kubernetes.io/name: worldbuilder-web
    app.kubernetes.io/component: ingress
    app.kubernetes.io/part-of: flowercore
    app.kubernetes.io/managed-by: argocd
    flowercore.io/tenant-id: system
    flowercore.io/created-by: bluejay-infra
 spec:
  entryPoints:
    - websecure
--- a/apps/zabbix/zabbix.yaml
+++ b/apps/zabbix/zabbix.yaml
@@ -305,15 +305,17 @@ spec:
              path: /
              port: 8080
            initialDelaySeconds: 60
-            timeoutSeconds: 5
+            timeoutSeconds: 15
            periodSeconds: 10
            failureThreshold: 3
          readinessProbe:
            httpGet:
              path: /
              port: 8080
            initialDelaySeconds: 30
            periodSeconds: 5
-            timeoutSeconds: 5
+            timeoutSeconds: 15
            failureThreshold: 3
 ---
 apiVersion: v1
 kind: Service
--- a/docs/runbooks/openvoxserver-quadlet-durability.md
+++ b/docs/runbooks/openvoxserver-quadlet-durability.md
@@ -0,0 +1,84 @@
 # openvoxserver Quadlet Durability
 This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`.
 ## Current State
 As of the Sprint 32 Cx-12 apply on 2026-05-17:
 - `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key.
 - `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container.
 - `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed.
 - `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`.
 - `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present.
 ## Discovery
 Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly.
 ```bash
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service"
 ```
 If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit.
 ## Durable Fix Shape
 The Quadlet keeps the deploy key as a path reference only:
 ```ini
 Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts
 ```
 The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs:
 ```bash
 /usr/bin/podman exec openvoxserver git config --global --add safe.directory *
 ```
 The deploy script self-heals inside the container before it fetches the control repo:
 ```bash
 git config --global --add safe.directory "*" 2>/dev/null || true
 DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key"
 KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts"
 REPO="/etc/puppetlabs/code/environments/production"
 export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS"
 git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true
 ```
 ## Validation
 Non-destructive validation:
 ```bash
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l"
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand"
 ```
 Destructive recreate smoke is opt-in only:
 ```bash
 scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh
 ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh"
 ```
 Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds.
 ## Credential Rotation Note
 When rotating the Puppet deploy key, update the persisted serverdata copy on noc1:
 ```bash
 sudo install -m 0600 -o root -g root <new-deploy-key> /opt/puppet/serverdata/.puppet-deploy-key
 sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts"
 sudo systemctl start openvoxserver-safeconfig.service
 sudo systemctl start puppet-deploy.service
 ```
 Never commit the deploy key or print it in logs.
--- a/scripts/monitoring/openvox-recreate-smoke.sh
+++ b/scripts/monitoring/openvox-recreate-smoke.sh
@@ -0,0 +1,48 @@
 #!/usr/bin/env bash
 set -euo pipefail
 if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then
  echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2
  exit 64
 fi
 SUDO="${SUDO:-sudo}"
 REPO="/etc/puppetlabs/code/environments/production"
 CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key"
 if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then
  echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2
  exit 65
 fi
 before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)"
 echo "Before recreate: $before"
 $SUDO systemctl stop openvoxserver
 $SUDO podman rm openvoxserver 2>/dev/null || true
 $SUDO systemctl start openvoxserver
 sleep 50
 $SUDO systemctl start puppet-deploy.service
 sleep 5
 $SUDO systemctl status puppet-deploy.service --no-pager -l
 after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)"
 echo "After recreate origin/master: $after"
 $SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests
 core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)"
 case "$core_ssh" in
  *"$CORE_SSH_COMMAND_FRAGMENT"*) ;;
  *)
    echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2
    exit 1
    ;;
 esac
 $SUDO podman exec openvoxserver git -C "$REPO" status --short --branch
 echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure."
--- a/tests/bluejay-infra-lint/FleetManifestLintTests.cs
+++ b/tests/bluejay-infra-lint/FleetManifestLintTests.cs
@@ -13,6 +13,7 @@ public sealed class FleetManifestLintTests
    private static readonly HashSet<string> PublicReadOnlyHosts = new(StringComparer.Ordinal)
    {
        "brochure.flowercore.io",
        "dist.flowercore.io",
        "dns.iamworkin.lan",
    };
@@ -54,6 +55,43 @@ public sealed class FleetManifestLintTests
        "ttsreader-piper",
    };
    private static readonly IReadOnlyDictionary<string, string> LinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
    {
        ["github-runner"] = "https://github.com/astoltz/FlowerCore.Common",
        ["github-runner-sharedpos"] = "https://github.com/astoltz/FlowerCore.Shared.Pos",
        ["github-runner-puppet"] = "https://github.com/astoltz/FlowerCore.Puppet",
        ["github-runner-signage"] = "https://github.com/astoltz/FlowerCore.Signage",
        ["github-runner-dms"] = "https://github.com/astoltz/FlowerCore.DMS",
        ["github-runner-telephony"] = "https://github.com/astoltz/FlowerCore.Telephony",
        ["github-runner-print-web"] = "https://github.com/astoltz/FlowerCore.Print.Web",
        ["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
        ["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
        ["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
    };
    private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
    {
        "github-runner-sharedpos",
        "github-runner-puppet",
        "github-runner-signage",
        "github-runner-dms",
        "github-runner-telephony",
        "github-runner-print-web",
        "github-runner-chat",
        "github-runner-mysql",
        "github-runner-kiosk-linux",
    };
    private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
    {
        ["HOME"] = "/home/runner",
        ["DOTNET_INSTALL_DIR"] = "/home/runner/.dotnet",
        ["DOTNET_CLI_HOME"] = "/home/runner",
        ["NUGET_PACKAGES"] = "/home/runner/.nuget/packages",
        ["XDG_CACHE_HOME"] = "/home/runner/.cache",
        ["RUNNER_TOOL_CACHE"] = "/home/runner/_tool",
    };
    [Fact]
    public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
    {
@@ -187,6 +225,98 @@ public sealed class FleetManifestLintTests
        violations.Should().BeEmpty();
    }
    [Fact]
    public void GitHubRunnerFleet_MustRegisterRequiredReposAsRepoScopedDeployments()
    {
        var deployments = GitHubRunnerDeployments();
        foreach (var expectedRunner in LinuxRunnerRepos)
        {
            deployments.Should().ContainKey(expectedRunner.Key);
            var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
            EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
            EnvValue(container, "EPHEMERAL").Should().Be("true");
            EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
            EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
            EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
            EnvSecretName(container, "ACCESS_TOKEN").Should().Be("github-runner-token");
            EnvSecretKey(container, "ACCESS_TOKEN").Should().Be("credential");
        }
    }
    [Fact]
    public void GitHubRunnerFleet_MustSetWritableNonRootDotnetAndCachePaths()
    {
        foreach (var deployment in GitHubRunnerDeployments().Values)
        {
            var container = deployment.ContainerMappings().Should().ContainSingle().Subject;
            foreach (var expectedEnv in WritableRunnerEnv)
            {
                EnvValue(container, expectedEnv.Key).Should().Be(expectedEnv.Value, $"{deployment.Name} must keep .NET paths writable for uid 1001");
            }
            var mounts = ManifestNodeExtensions.MappingSequence(container, "volumeMounts")
                .ToDictionary(
                    mount => ManifestNodeExtensions.Scalar(mount, "name") ?? string.Empty,
                    mount => ManifestNodeExtensions.Scalar(mount, "mountPath") ?? string.Empty,
                    StringComparer.Ordinal);
            mounts.Should().Contain("runner-home", "/home/runner");
            mounts.Should().Contain("nuget-cache", "/home/runner/.nuget/packages");
            mounts.Should().Contain("tmp", "/tmp");
        }
    }
    [Fact]
    public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForScaledDeployments()
    {
        var deployments = GitHubRunnerDeployments();
        foreach (var deploymentName in ScaledLinuxRunnerDeployments)
        {
            var deployment = deployments[deploymentName];
            ReplicaCount(deployment).Should().Be(2);
            var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
            var claimNames = volumes
                .Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
                .Where(value => !string.IsNullOrWhiteSpace(value))
                .ToList();
            claimNames.Should().BeEmpty($"{deploymentName} is scaled and must not share a RWO PVC");
            volumes.Should().Contain(volume =>
                string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
                && ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
        }
        var common = deployments["github-runner"];
        ReplicaCount(common).Should().Be(1);
        common.MappingSequence("spec", "template", "spec", "volumes")
            .Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
            .Where(value => !string.IsNullOrWhiteSpace(value))
            .Should()
            .ContainSingle()
            .Which
            .Should()
            .Be("github-runner-nuget-cache");
    }
    [Fact]
    public void Monitoring_MustAlertWhenLinuxRunnerDeploymentIsUnavailable()
    {
        var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
        monitoring.Should().Contain("MacMiniRunnerOffline");
        monitoring.Should().Contain("LinuxRunnerOffline");
        monitoring.Should().Contain("kube_deployment_status_replicas_ready");
        monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
        monitoring.Should().Contain("folder: CI Alerts");
        monitoring.Should().Contain("uid: linux-runner-offline");
        monitoring.Should().Contain("alert_channel: irc");
    }
    [Fact]
    public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
    {
@@ -291,6 +421,184 @@ public sealed class FleetManifestLintTests
        violations.Should().BeEmpty();
    }
    [Fact]
    public void FcDeviceManagement_MustShipExpectedManifestSet()
    {
        var appRoot = Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt");
        Directory.Exists(appRoot).Should().BeTrue("Sprint 8 Cx-5 owns apps/fc-devicemgmt.");
        var expectedFiles = new[]
        {
            "1password-item.yaml",
            "argocd-application.yaml",
            "certificate-web.yaml",
            "clusterrole-operator.yaml",
            "clusterrolebinding-operator.yaml",
            "deployment-operator.yaml",
            "deployment-web.yaml",
            "ingressroute-web.yaml",
            "namespace.yaml",
            "network-policy.yaml",
            "service-web.yaml",
            "serviceaccount-operator.yaml",
        };
        Directory.GetFiles(appRoot, "*.yaml")
            .Select(Path.GetFileName)
            .Should()
            .BeEquivalentTo(expectedFiles);
        foreach (var expectedFile in expectedFiles)
        {
            FcDeviceManagementDocuments()
                .Should()
                .Contain(document => document.RelativePath == $"fc-devicemgmt/{expectedFile}");
        }
    }
    [Fact]
    public void FcDeviceManagement_ObjectsMustCarryStandardTraceabilityLabels()
    {
        var requiredLabels = new[]
        {
            "app.kubernetes.io/name",
            "app.kubernetes.io/part-of",
            "app.kubernetes.io/managed-by",
            "flowercore.io/tenant-id",
            "flowercore.io/created-by",
        };
        var violations = FcDeviceManagementDocuments()
            .SelectMany(document => requiredLabels
                .Where(label => string.IsNullOrWhiteSpace(document.Scalar("metadata", "labels", label)))
                .Select(label => $"{document.Descriptor} is missing metadata.labels['{label}']."))
            .Concat(FcDeviceManagementDocuments()
                .Where(document => document.Kind == "Deployment")
                .SelectMany(document => requiredLabels
                    .Where(label => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "labels", label)))
                    .Select(label => $"{document.Descriptor} pod template is missing metadata.labels['{label}'].")))
            .Concat(FcDeviceManagementDocuments()
                .Where(document => document.Kind == "Deployment")
                .Where(document => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "annotations", "flowercore.io/audit-trace-id")))
                .Select(document => $"{document.Descriptor} pod template is missing flowercore.io/audit-trace-id."))
            .ToList();
        violations.Should().BeEmpty();
    }
    [Fact]
    public void FcDeviceManagement_IngressMustUseCertManagerAndKeepPublicHostDisabled()
    {
        var appText = string.Join(
            Environment.NewLine,
            Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
                .Select(File.ReadAllText));
        appText.Should().NotContain("certResolver");
        appText.Should().Contain("update.flowercore.io");
        appText.Should().Contain("disabled-until-Q-OIDC-1");
        FcDeviceManagementDocuments()
            .Where(document => document.Kind == "IngressRoute")
            .SelectMany(document => document.MappingSequence("spec", "routes"))
            .Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
            .Should()
            .Contain(match => match.Contains("Host(`devices.iamworkin.lan`)", StringComparison.Ordinal))
            .And.NotContain(match => match.Contains("Host(`update.flowercore.io`)", StringComparison.Ordinal));
        var certificate = FcDeviceManagementDocuments()
            .Single(document => document.Kind == "Certificate" && document.Name == "fc-devicemgmt-web-tls");
        certificate.Scalar("spec", "issuerRef", "name").Should().Be("step-ca-acme");
        certificate.Scalar("spec", "issuerRef", "kind").Should().Be("ClusterIssuer");
        ManifestNodeExtensions.ScalarSequence(certificate.Root, "spec", "dnsNames")
            .Should()
            .ContainSingle("devices.iamworkin.lan");
    }
    [Fact]
    public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
    {
        var clusterRole = FcDeviceManagementDocuments()
            .Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
        var allScalars = clusterRole.AllScalars().ToList();
        allScalars.Should().Contain("devices.flowercore.io");
        allScalars.Should().Contain("*");
        allScalars.Should().Contain("deployments");
        allScalars.Should().Contain("get");
        var operatorDeployment = FcDeviceManagementDocuments()
            .Single(document => document.Kind == "Deployment" && document.Name == "fc-devicemgmt-operator");
        operatorDeployment.AllScalars().Should().Contain("FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT");
        operatorDeployment.AllScalars().Should().Contain("fc-devicemgmt-operator");
    }
    [Fact]
    public void FcDeviceManagement_RuntimeSecretsMustUseOnePasswordItemPattern()
    {
        var item = FcDeviceManagementDocuments()
            .Single(document => document.Kind == "OnePasswordItem" && document.Name == "fc-devicemgmt-runtime");
        item.Scalar("spec", "itemPath")
            .Should()
            .Be("vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime");
        var appText = string.Join(
            Environment.NewLine,
            Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
                .Select(File.ReadAllText));
        FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
        appText.Should().Contain("secretKeyRef:");
        appText.Should().Contain("secretName: fc-devicemgmt-runtime");
        appText.Should().NotContain("stringData:");
        appText.Should().NotContain("from-literal");
        appText.Should().NotContain("tls.key:");
    }
    [Fact]
    public void FcDeviceManagement_NetworkPoliciesMustAllowLanAgentsSynologyAndDnatPorts()
    {
        var policies = FcDeviceManagementDocuments()
            .Where(document => document.Kind == "NetworkPolicy")
            .ToList();
        policies.Should().HaveCount(2);
        var combinedScalars = policies.SelectMany(policy => policy.AllScalars()).ToList();
        combinedScalars.Should().Contain("10.0.56.0/24");
        combinedScalars.Should().Contain("10.0.57.0/24");
        combinedScalars.Should().Contain("10.0.58.0/24");
        combinedScalars.Should().Contain("10.0.68.0/27");
        combinedScalars.Should().Contain("10.0.58.3/32");
        var combinedEgressPorts = policies.SelectMany(policy => policy.EgressPorts()).ToHashSet(StringComparer.Ordinal);
        combinedEgressPorts.Should().Contain(new[] { "80", "443", "8080", "8443", "2049", "111" });
        var traefikVipPolicies = policies
            .Where(policy => policy.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal)))
            .ToList();
        traefikVipPolicies.Should().ContainSingle();
        traefikVipPolicies[0].EgressPorts().Should().Contain(new[] { "80", "443", "8080", "8443" });
    }
    [Fact]
    public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
    {
        var application = FcDeviceManagementDocuments()
            .Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
        application.Namespace.Should().Be("argocd");
        application.Scalar("spec", "source", "repoURL")
            .Should()
            .Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
        application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
        application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
    }
    private static IEnumerable<string> ProbeViolations(
        ManifestDocument document,
        YamlMappingNode container,
@@ -314,6 +622,51 @@ public sealed class FleetManifestLintTests
            $"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
        };
    }
    private static IReadOnlyDictionary<string, ManifestDocument> GitHubRunnerDeployments()
    {
        return Inventory.Documents
            .Where(document => document.Kind == "Deployment")
            .Where(document => document.Namespace == "github-runner")
            .ToDictionary(document => document.Name, StringComparer.Ordinal);
    }
    private static int ReplicaCount(ManifestDocument document)
    {
        return int.TryParse(document.Scalar("spec", "replicas"), out var replicas) ? replicas : 1;
    }
    private static string? EnvValue(YamlMappingNode container, string name)
    {
        return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
    }
    private static string? EnvSecretName(YamlMappingNode container, string name)
    {
        return EnvMapping(container, name) is { } env
            ? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "name")
            : null;
    }
    private static string? EnvSecretKey(YamlMappingNode container, string name)
    {
        return EnvMapping(container, name) is { } env
            ? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "key")
            : null;
    }
    private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name)
    {
        return ManifestNodeExtensions.MappingSequence(container, "env")
            .SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
    }
    private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
    {
        return Inventory.Documents
            .Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
            .ToList();
    }
 }
 internal sealed class ManifestInventory
--- a/tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
+++ b/tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
@@ -0,0 +1,99 @@
 using FluentAssertions;
 using Xunit;
 namespace BluejayInfraLint.Tests;
 [Trait("Category", "Unit")]
 public sealed class OpenVoxServerDurabilityTests
 {
    private static readonly string Root = FindRepoRoot();
    private static readonly string RunbookPath = Path.Combine(Root, "docs", "runbooks", "openvoxserver-quadlet-durability.md");
    private static readonly string SmokePath = Path.Combine(Root, "scripts", "monitoring", "openvox-recreate-smoke.sh");
    [Fact]
    public void Runbook_DocumentsHostArtifactAndNonArgoPath()
    {
        var runbook = File.ReadAllText(RunbookPath);
        runbook.Should().Contain("noc1 host artifact");
        runbook.Should().Contain("not an ArgoCD application");
        runbook.Should().Contain("systemctl cat openvoxserver");
        runbook.Should().Contain("/etc/containers/systemd/openvoxserver.container");
    }
    [Fact]
    public void Runbook_DocumentsCx12LiveApplyState()
    {
        var runbook = File.ReadAllText(RunbookPath);
        runbook.Should().Contain("Sprint 32 Cx-12");
        runbook.Should().Contain("openvoxserver-safeconfig.service");
        runbook.Should().Contain("/opt/puppet/r10k-deploy.sh");
        runbook.Should().Contain("HEAD == origin/master");
    }
    [Fact]
    public void SmokeScript_IsExplicitlyOptIn()
    {
        var smoke = File.ReadAllText(SmokePath);
        smoke.Should().Contain("OPENVOX_RECREATE_SMOKE");
        smoke.Should().Contain("exit 64");
        smoke.IndexOf("OPENVOX_RECREATE_SMOKE", StringComparison.Ordinal)
            .Should().BeLessThan(smoke.IndexOf("systemctl stop openvoxserver", StringComparison.Ordinal));
    }
    [Fact]
    public void SmokeScript_RequiresGeneratedSystemdUnitBeforeRemovingContainer()
    {
        var smoke = File.ReadAllText(SmokePath);
        smoke.Should().Contain("systemctl cat openvoxserver");
        smoke.Should().Contain("refusing to remove a container without a verified systemd recreate path");
        smoke.IndexOf("systemctl cat openvoxserver", StringComparison.Ordinal)
            .Should().BeLessThan(smoke.IndexOf("podman rm openvoxserver", StringComparison.Ordinal));
    }
    [Fact]
    public void Artifacts_DoNotStoreSecretsOrPaidRunnerLabels()
    {
        var forbidden = new[]
        {
            "BEGIN OPENSSH PRIVATE KEY",
            "BEGIN RSA PRIVATE KEY",
            "ubuntu-latest",
            "windows-latest",
            "macos-latest",
        };
        var violations = new[] { RunbookPath, SmokePath }
            .SelectMany(path =>
            {
                var text = File.ReadAllText(path);
                return forbidden
                    .Where(token => text.Contains(token, StringComparison.OrdinalIgnoreCase))
                    .Select(token => $"{Path.GetRelativePath(Root, path)} contains forbidden token {token}");
            })
            .ToList();
        violations.Should().BeEmpty();
    }
    private static string FindRepoRoot()
    {
        var current = new DirectoryInfo(AppContext.BaseDirectory);
        while (current is not null)
        {
            if (Directory.Exists(Path.Combine(current.FullName, "apps"))
                && Directory.Exists(Path.Combine(current.FullName, "scripts"))
                && File.Exists(Path.Combine(current.FullName, "README.md")))
            {
                return current.FullName;
            }
            current = current.Parent;
        }
        throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
    }
 }
--- a/tests/bluejay-infra-lint/PiSignagePlayerArtifactTests.cs
+++ b/tests/bluejay-infra-lint/PiSignagePlayerArtifactTests.cs
@@ -0,0 +1,269 @@
 using System.Text.Json;
 using FluentAssertions;
 using Xunit;
 namespace BluejayInfraLint.Tests;
 [Trait("Category", "Unit")]
 public sealed class PiSignagePlayerArtifactTests
 {
    private static readonly string Root = FindRepoRoot();
    private static readonly string AppRoot = Path.Combine(Root, "apps", "fc-signage-pi-player");
    public static TheoryData<string> RequiredArtifacts => new()
    {
        "README.md",
        "systemd/flowercore-signage-player-pi.service",
        "systemd/flowercore-signage-player-pi-hdmi.service",
        "systemd/flowercore-signage-bootstrap.service",
        "systemd/flowercore-signage-renew.service",
        "systemd/flowercore-signage-renew.timer",
        "systemd/flowercore-signage-detect-display.service",
        "systemd/flowercore-signage-detect-display.timer",
        "systemd/99-flowercore-signage-hdmi.rules",
        "chromium-policies/flowercore-signage.json",
        "scripts/flowercore-signage-launch.sh",
        "scripts/flowercore-signage-prelaunch.sh",
        "scripts/flowercore-signage-bootstrap.sh",
        "scripts/flowercore-signage-renew-cert.sh",
        "scripts/flowercore-signage-hdmi-respond.sh",
        "scripts/fc-signage-detect-display",
    };
    [Theory]
    [MemberData(nameof(RequiredArtifacts))]
    public void RequiredArtifacts_ArePresent(string relativePath)
    {
        File.Exists(Path.Combine(AppRoot, relativePath)).Should().BeTrue(relativePath);
    }
    [Fact]
    public void PlayerService_UsesExpectedRestartAndMemoryGuards()
    {
        var unit = Read("systemd/flowercore-signage-player-pi.service");
        unit.Should().Contain("Restart=always");
        unit.Should().Contain("RestartSec=10s");
        unit.Should().Contain("StartLimitBurst=5");
        unit.Should().Contain("StartLimitIntervalSec=300s");
        unit.Should().Contain("MemoryMax=2G");
    }
    [Fact]
    public void PlayerService_IsGatedByNodeIdentityAndMtlsCertificate()
    {
        var unit = Read("systemd/flowercore-signage-player-pi.service");
        unit.Should().Contain("ConditionPathExists=/etc/flowercore/signage-node.json");
        unit.Should().Contain("ConditionPathExists=/etc/fc-signage-player/client.p12");
        unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh");
    }
    [Fact]
    public void LaunchScript_TriesEmbedThenFallsBackToBarePlayerRoute()
    {
        var script = Read("scripts/flowercore-signage-launch.sh");
        script.Should().Contain("/player/${NODE_ID}/embed?token=${CERT_THUMB}");
        script.Should().Contain("url-divergence.log");
        script.Should().Contain("/player/${NODE_ID}?token=${CERT_THUMB}");
    }
    [Fact]
    public void LaunchScript_DisablesChromiumPromptsAndRuntimeUpdates()
    {
        var script = Read("scripts/flowercore-signage-launch.sh");
        script.Should().Contain("--noerrdialogs");
        script.Should().Contain("--disable-infobars");
        script.Should().Contain("--password-store=basic");
        script.Should().Contain("--check-for-update-interval=2592000");
    }
    [Fact]
    public void PrelaunchScript_AbortsWhenRequiredFilesAreMissing()
    {
        var script = Read("scripts/flowercore-signage-prelaunch.sh");
        script.Should().Contain("for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass");
        script.Should().Contain("exit 1");
        script.Should().Contain("-checkend $((7*24*3600))");
    }
    [Fact]
    public void BootstrapScript_IsIdempotentWhenAlreadyEnrolled()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("already enrolled");
        script.Should().Contain("exit 0");
        script.Should().Contain(".enrolledAt");
    }
    [Fact]
    public void BootstrapScript_GeneratesStableMachineIdFromUuid()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("uuidgen");
        script.Should().Contain("cut -c1-16");
        script.Should().Contain("machineId");
    }
    [Fact]
    public void BootstrapScript_RetriesRegisterOnceForFirstCallRace()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("for attempt in 1 2");
        script.Should().Contain("register attempt $attempt returned");
        script.Should().Contain("sleep 5");
    }
    [Fact]
    public void BootstrapScript_SupportsSetupCodeAndApprovalPollingBudget()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("signage-setup-code");
        script.Should().Contain("approve-via-setup-code");
        script.Should().Contain("+ 1800");
        script.Should().Contain("sleep 15");
    }
    [Fact]
    public void BootstrapScript_CsrSubjectIdentifiesPiPlayer()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi");
    }
    [Fact]
    public void BootstrapScript_PersistsCertificateAsP12WithRestrictivePermissions()
    {
        var script = Read("scripts/flowercore-signage-bootstrap.sh");
        script.Should().Contain("openssl pkcs12 -export");
        script.Should().Contain("client.p12.pass");
        script.Should().Contain("chmod 0600");
        script.Should().Contain("chmod 0640");
    }
    [Fact]
    public void RenewScript_OnlyRunsWhenCertHasLessThanThirtyDays()
    {
        var script = Read("scripts/flowercore-signage-renew-cert.sh");
        script.Should().Contain("-checkend $((30*24*3600))");
        script.Should().Contain("exit 0");
        script.Should().Contain("/renew");
    }
    [Fact]
    public void RenewScript_AtomicallySwapsNewCertificateFiles()
    {
        var script = Read("scripts/flowercore-signage-renew-cert.sh");
        script.Should().Contain("client.key.new");
        script.Should().Contain("mv \"$CERT_DIR/client.key.new\" \"$CERT_DIR/client.key\"");
        script.Should().Contain("mv \"$CERT_DIR/client.p12.new\" \"$CERT_DIR/client.p12\"");
    }
    [Fact]
    public void HdmiRule_RestartsPlayerAndRunsCapabilityDetection()
    {
        var rule = Read("systemd/99-flowercore-signage-hdmi.rules");
        var responder = Read("scripts/flowercore-signage-hdmi-respond.sh");
        rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
        rule.Should().Contain("start flowercore-signage-player-pi-hdmi.service");
        responder.Should().Contain("sleep 2");
        responder.Should().Contain("start flowercore-signage-detect-display.service");
        responder.Should().Contain("restart flowercore-signage-player-pi.service");
    }
    [Fact]
    public void DetectDisplayServiceAndTimer_RunAtBootAndDaily()
    {
        var service = Read("systemd/flowercore-signage-detect-display.service");
        var timer = Read("systemd/flowercore-signage-detect-display.timer");
        service.Should().Contain("ExecStart=/usr/local/bin/fc-signage-detect-display");
        timer.Should().Contain("OnBootSec=30s");
        timer.Should().Contain("OnCalendar=daily");
        timer.Should().Contain("RandomizedDelaySec=1h");
    }
    [Fact]
    public void DetectDisplayScript_EmitsDisconnectedProfileWhenNoHdmiIsPresent()
    {
        var script = Read("scripts/fc-signage-detect-display");
        script.Should().Contain("displayConnected: false");
        script.Should().Contain("No HDMI display detected");
    }
    [Fact]
    public void DetectDisplayScript_ParsesEdidForHdrResolutionAndAudio()
    {
        var script = Read("scripts/fc-signage-detect-display");
        script.Should().Contain("edid-decode");
        script.Should().Contain("HDR (Static|Dynamic) Metadata Block");
        script.Should().Contain("maxResolution");
        script.Should().Contain("hasAudioOutput");
    }
    [Fact]
    public void DetectDisplayScript_TriesBothForwardCompatibleCapabilityEndpoints()
    {
        var script = Read("scripts/fc-signage-detect-display");
        script.Should().Contain("/api/v1/nodes/${NODE_ID}/capabilities");
        script.Should().Contain("/api/v1/displays/${NODE_ID}/capability-profile");
        script.Should().Contain("no endpoint accepted the profile");
    }
    [Fact]
    public void ChromiumPolicy_IsValidJsonAndDisablesCredentialPrompts()
    {
        using var doc = JsonDocument.Parse(Read("chromium-policies/flowercore-signage.json"));
        var root = doc.RootElement;
        root.GetProperty("AutofillAddressEnabled").GetBoolean().Should().BeFalse();
        root.GetProperty("AutofillCreditCardEnabled").GetBoolean().Should().BeFalse();
        root.GetProperty("PasswordManagerEnabled").GetBoolean().Should().BeFalse();
        root.GetProperty("ExtensionInstallBlocklist")[0].GetString().Should().Be("*");
    }
    [Fact]
    public void RenewalTimer_UsesDailyCadenceWithTwoHourJitter()
    {
        var timer = Read("systemd/flowercore-signage-renew.timer");
        timer.Should().Contain("OnCalendar=daily");
        timer.Should().Contain("RandomizedDelaySec=2h");
        timer.Should().Contain("Persistent=true");
    }
    private static string Read(string relativePath)
        => File.ReadAllText(Path.Combine(AppRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
    private static string FindRepoRoot()
    {
        var current = new DirectoryInfo(AppContext.BaseDirectory);
        while (current is not null)
        {
            if (Directory.Exists(Path.Combine(current.FullName, "apps"))
                && File.Exists(Path.Combine(current.FullName, "README.md")))
            {
                return current.FullName;
            }
            current = current.Parent;
        }
        throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
    }
 }
--- a/tests/bluejay-infra-lint/conftest.dev/02_public_method_allowlist.rego
+++ b/tests/bluejay-infra-lint/conftest.dev/02_public_method_allowlist.rego
@@ -1,6 +1,6 @@
 package bluejayinfra.public_method_allowlist
-public_hosts := {"dist.flowercore.io", "dns.iamworkin.lan"}
+public_hosts := {"brochure.flowercore.io", "dist.flowercore.io", "dns.iamworkin.lan"}
 deny[msg] {
  input.kind == "IngressRoute"
		`@@ -0,0 +1,2 @@`
							`# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.`
							`SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"`