Compare commits
110 Commits
codex/sign
...
cb4ea13e7a
| Author | SHA1 | Date | |
|---|---|---|---|
| cb4ea13e7a | |||
|
|
a3cd67d6bb | ||
|
|
81a3ddac4c | ||
| 300f8ad546 | |||
| fe38c2641f | |||
|
|
3b40dfb185 | ||
| 103878671c | |||
|
|
36039c1335 | ||
| 2a66109f13 | |||
|
|
933fea89d1 | ||
|
|
13f9bb7710 | ||
|
|
9a58fd2af6 | ||
|
|
404d884863 | ||
| f4bd90f805 | |||
|
|
67d67ab73d | ||
|
|
f7d41cdc60 | ||
|
|
2c0afc28e4 | ||
|
|
ba5f5dd0fb | ||
|
|
dc699da7b3 | ||
|
|
1e8bf54c6e | ||
|
|
e2e93d482c | ||
| 4319cc2b51 | |||
|
|
2bf339ce51 | ||
|
|
5bdedfc5ae | ||
|
|
0307ae16ae | ||
|
|
6c18f69cf2 | ||
|
|
47e2256556 | ||
|
|
9d77f8ba0e | ||
|
|
2f4be19c85 | ||
|
|
2a62c40990 | ||
|
|
7be98e5efc | ||
|
|
a65b356c9d | ||
|
|
08c17ef1b4 | ||
|
|
06f2f002b7 | ||
|
|
7ac4a8b4b7 | ||
|
|
90f2a86819 | ||
|
|
cbdefb2b23 | ||
|
|
1c36fe3a0a | ||
|
|
2b420ce8a4 | ||
|
|
5cbc1a06b1 | ||
|
|
9e7ee39b3a | ||
|
|
ae030a5f33 | ||
| bc8c35896f | |||
|
|
2cc91b6df0 | ||
| 0d2090fe81 | |||
|
|
bc3548e715 | ||
| 74333cc26b | |||
|
|
7310fb88c2 | ||
| 148bc87b9a | |||
|
|
2a1e842100 | ||
| bc28430d24 | |||
|
|
cc92272217 | ||
| d6f4468a9c | |||
|
|
2f796a2ebd | ||
| 1f1f6823db | |||
|
|
b92f74b63a | ||
|
|
cb7f7dbc4d | ||
|
|
03126d5584 | ||
|
|
495e884c41 | ||
|
|
65aa1e6104 | ||
|
|
7f2a3b76b4 | ||
| ea73f00461 | |||
|
|
25ace30a03 | ||
|
|
ca574c2280 | ||
|
|
09387f90e1 | ||
|
|
e641ceab48 | ||
|
|
c263426ea5 | ||
|
|
bacac067cf | ||
| 914fed08d8 | |||
|
|
200aeab032 | ||
|
|
8182616d4c | ||
|
|
f0862ac03c | ||
|
|
46c392605e | ||
| 89b147bbdd | |||
| d7238a5e3b | |||
| fc444a02a1 | |||
| 83d4883d55 | |||
| f8fe3b2688 | |||
| f2ab892ebc | |||
| fef68a9560 | |||
|
|
6fe77225ae | ||
| 634b9c4169 | |||
| b8c7e59005 | |||
| 65ac8d6f01 | |||
| 35844e0dbd | |||
| b1e307151e | |||
| 12b07219c7 | |||
| 9fd32c4415 | |||
| ad670fb344 | |||
|
|
6f6ca50987 | ||
|
|
c7be58c1f7 | ||
|
|
a1f5a393cd | ||
|
|
710340d8be | ||
|
|
7d2daaa4f8 | ||
|
|
e50e103ba0 | ||
|
|
e8094eb0bd | ||
| 8d87d9172c | |||
|
|
cfd9743afa | ||
|
|
5029e209cd | ||
|
|
f298339152 | ||
|
|
6e7d88db49 | ||
|
|
5ae50bd491 | ||
|
|
653d4472f5 | ||
|
|
eb8693e1ce | ||
|
|
667777a653 | ||
|
|
84c9feb893 | ||
|
|
427dbfcef2 | ||
|
|
b651a4e2d0 | ||
|
|
b998f50f48 | ||
|
|
8fd9ae1cd3 |
4
.gitattributes
vendored
Normal file
4
.gitattributes
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
/.gitattributes text eol=lf
|
||||
*.yaml text eol=lf
|
||||
*.yml text eol=lf
|
||||
*.sh text eol=lf
|
||||
11
README.md
11
README.md
@@ -116,8 +116,19 @@ dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release
|
||||
|
||||
That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`.
|
||||
|
||||
## Non-K8s Pi Artifacts
|
||||
|
||||
Some `apps/*` directories are deployment artifact bundles consumed by Puppet
|
||||
instead of Kubernetes workloads. `apps/fc-signage-pi-player/` carries the
|
||||
Chromium signage Pi player, `apps/fc-divoom-dm-pi-device/` carries the additive
|
||||
edge2 Divoom-as-DeviceManagement-device profile/Hiera contract, and
|
||||
`apps/fc-divoom-tv-pi/` carries the Divoom TV Pi HDMI systemd/Puppet shape.
|
||||
These bundles intentionally avoid Deployment, IngressRoute, Certificate, and
|
||||
OnePasswordItem resources.
|
||||
|
||||
## References
|
||||
|
||||
- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
|
||||
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
||||
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
||||
- Public DNS operator host: `https://dns.iamworkin.lan`
|
||||
|
||||
448
apps/authentik/authentik.yaml
Normal file
448
apps/authentik/authentik.yaml
Normal file
@@ -0,0 +1,448 @@
|
||||
# Authentik OIDC backend
|
||||
# ArgoCD-managed. BlueJay Lab.
|
||||
#
|
||||
# Stack:
|
||||
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
|
||||
# - Redis 7 Deployment (no persistence — session/cache only)
|
||||
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
|
||||
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
|
||||
# - Certificate via step-ca-acme ClusterIssuer
|
||||
# - Traefik IngressRoute at id.iamworkin.lan
|
||||
#
|
||||
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
|
||||
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
|
||||
#
|
||||
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
|
||||
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
|
||||
# via API once the bootstrap token is available — see Notes substrate).
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: authentik
|
||||
labels:
|
||||
app.kubernetes.io/part-of: bluejay-infra
|
||||
|
||||
---
|
||||
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
|
||||
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
|
||||
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: authentik-credentials
|
||||
namespace: authentik
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
|
||||
|
||||
---
|
||||
# Shared media volume for server + worker pods.
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: authentik-media
|
||||
namespace: authentik
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: [ReadWriteOnce]
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
|
||||
---
|
||||
# PostgreSQL 16 StatefulSet — Authentik's primary store.
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: authentik-postgres
|
||||
namespace: authentik
|
||||
labels:
|
||||
app: authentik-postgres
|
||||
argocd.argoproj.io/instance: infra-authentik
|
||||
spec:
|
||||
persistentVolumeClaimRetentionPolicy:
|
||||
whenDeleted: Retain
|
||||
whenScaled: Retain
|
||||
podManagementPolicy: OrderedReady
|
||||
serviceName: authentik-postgres
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app: authentik-postgres
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: authentik-postgres
|
||||
spec:
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:16-alpine
|
||||
ports:
|
||||
- containerPort: 5432
|
||||
name: postgres
|
||||
env:
|
||||
- name: POSTGRES_USER
|
||||
value: authentik
|
||||
- name: POSTGRES_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: POSTGRES_PASSWORD
|
||||
- name: POSTGRES_DB
|
||||
value: authentik
|
||||
- name: POSTGRES_INITDB_ARGS
|
||||
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
|
||||
- name: PGDATA
|
||||
value: /var/lib/postgresql/data/pgdata
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: ["pg_isready", "-U", "authentik"]
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
livenessProbe:
|
||||
exec:
|
||||
command: ["pg_isready", "-U", "authentik"]
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 256Mi }
|
||||
limits: { cpu: 1000m, memory: 1Gi }
|
||||
volumeMounts:
|
||||
- name: pgdata
|
||||
mountPath: /var/lib/postgresql/data
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: pgdata
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
accessModes: [ReadWriteOnce]
|
||||
volumeMode: Filesystem
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: authentik-postgres
|
||||
namespace: authentik
|
||||
spec:
|
||||
clusterIP: None
|
||||
selector:
|
||||
app: authentik-postgres
|
||||
ports:
|
||||
- name: postgres
|
||||
port: 5432
|
||||
targetPort: 5432
|
||||
|
||||
---
|
||||
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: authentik-redis
|
||||
namespace: authentik
|
||||
labels:
|
||||
app: authentik-redis
|
||||
argocd.argoproj.io/instance: infra-authentik
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: authentik-redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: authentik-redis
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7-alpine
|
||||
args:
|
||||
- "--save"
|
||||
- ""
|
||||
- "--appendonly"
|
||||
- "no"
|
||||
- "--requirepass"
|
||||
- "$(REDIS_PASSWORD)"
|
||||
env:
|
||||
- name: REDIS_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: REDIS_PASSWORD
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
name: redis
|
||||
readinessProbe:
|
||||
tcpSocket: { port: 6379 }
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
livenessProbe:
|
||||
tcpSocket: { port: 6379 }
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
resources:
|
||||
requests: { cpu: 50m, memory: 64Mi }
|
||||
limits: { cpu: 500m, memory: 256Mi }
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: authentik-redis
|
||||
namespace: authentik
|
||||
spec:
|
||||
selector:
|
||||
app: authentik-redis
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
|
||||
---
|
||||
# Authentik server Deployment — HTTP frontend on :9000.
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: authentik-server
|
||||
namespace: authentik
|
||||
labels:
|
||||
app: authentik-server
|
||||
argocd.argoproj.io/instance: infra-authentik
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate # shares /media RWO PVC with worker
|
||||
selector:
|
||||
matchLabels:
|
||||
app: authentik-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: authentik-server
|
||||
spec:
|
||||
securityContext:
|
||||
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
|
||||
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
|
||||
# non-root container can mkdir /media/public during the tenant_files migration.
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: server
|
||||
image: ghcr.io/goauthentik/server:2024.12.3
|
||||
args: ["server"]
|
||||
ports:
|
||||
- containerPort: 9000
|
||||
name: http
|
||||
- containerPort: 9443
|
||||
name: https
|
||||
env:
|
||||
- name: AUTHENTIK_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: AUTHENTIK_SECRET_KEY
|
||||
- name: AUTHENTIK_REDIS__HOST
|
||||
value: authentik-redis
|
||||
- name: AUTHENTIK_REDIS__PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: REDIS_PASSWORD
|
||||
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||
value: authentik-postgres
|
||||
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||
value: authentik
|
||||
- name: AUTHENTIK_POSTGRESQL__USER
|
||||
value: authentik
|
||||
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: POSTGRES_PASSWORD
|
||||
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: BOOTSTRAP_ADMIN_PASSWORD
|
||||
- name: AUTHENTIK_BOOTSTRAP_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: BOOTSTRAP_ADMIN_TOKEN
|
||||
- name: AUTHENTIK_BOOTSTRAP_EMAIL
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: BOOTSTRAP_ADMIN_EMAIL
|
||||
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||
value: "true"
|
||||
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||
value: "false"
|
||||
- name: AUTHENTIK_LOG_LEVEL
|
||||
value: info
|
||||
# First-boot Authentik can take 3+ min on the migration phase
|
||||
# (waiting on DB lock while worker also runs migrations). Initial
|
||||
# delays are generous so kubelet doesn't kill the pod mid-migration;
|
||||
# periodSeconds keeps post-startup probing responsive.
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /-/health/ready/
|
||||
port: 9000
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 12
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /-/health/live/
|
||||
port: 9000
|
||||
initialDelaySeconds: 300
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 10
|
||||
failureThreshold: 3
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /-/health/live/
|
||||
port: 9000
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 10
|
||||
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
|
||||
resources:
|
||||
requests: { cpu: 150m, memory: 512Mi }
|
||||
limits: { cpu: 1500m, memory: 1Gi }
|
||||
volumeMounts:
|
||||
- name: media
|
||||
mountPath: /media
|
||||
volumes:
|
||||
- name: media
|
||||
persistentVolumeClaim:
|
||||
claimName: authentik-media
|
||||
|
||||
---
|
||||
# Authentik worker Deployment — runs Celery background tasks.
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: authentik-worker
|
||||
namespace: authentik
|
||||
labels:
|
||||
app: authentik-worker
|
||||
argocd.argoproj.io/instance: infra-authentik
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate # shares /media RWO PVC with server
|
||||
selector:
|
||||
matchLabels:
|
||||
app: authentik-worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: authentik-worker
|
||||
spec:
|
||||
securityContext:
|
||||
# Same as server pod — non-root uid 1000 needs PVC group write.
|
||||
fsGroup: 1000
|
||||
containers:
|
||||
- name: worker
|
||||
image: ghcr.io/goauthentik/server:2024.12.3
|
||||
args: ["worker"]
|
||||
env:
|
||||
- name: AUTHENTIK_SECRET_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: AUTHENTIK_SECRET_KEY
|
||||
- name: AUTHENTIK_REDIS__HOST
|
||||
value: authentik-redis
|
||||
- name: AUTHENTIK_REDIS__PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: REDIS_PASSWORD
|
||||
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||
value: authentik-postgres
|
||||
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||
value: authentik
|
||||
- name: AUTHENTIK_POSTGRESQL__USER
|
||||
value: authentik
|
||||
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: authentik-credentials
|
||||
key: POSTGRES_PASSWORD
|
||||
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||
value: "true"
|
||||
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||
value: "false"
|
||||
- name: AUTHENTIK_LOG_LEVEL
|
||||
value: info
|
||||
resources:
|
||||
requests: { cpu: 100m, memory: 256Mi }
|
||||
limits: { cpu: 1000m, memory: 768Mi }
|
||||
volumeMounts:
|
||||
- name: media
|
||||
mountPath: /media
|
||||
volumes:
|
||||
- name: media
|
||||
persistentVolumeClaim:
|
||||
claimName: authentik-media
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: authentik-server
|
||||
namespace: authentik
|
||||
spec:
|
||||
selector:
|
||||
app: authentik-server
|
||||
ports:
|
||||
- name: http
|
||||
port: 9000
|
||||
targetPort: 9000
|
||||
- name: https
|
||||
port: 9443
|
||||
targetPort: 9443
|
||||
|
||||
---
|
||||
# step-ca leaf certificate for id.iamworkin.lan.
|
||||
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
|
||||
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
|
||||
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: authentik-tls
|
||||
namespace: authentik
|
||||
spec:
|
||||
secretName: authentik-tls
|
||||
dnsNames:
|
||||
- id.iamworkin.lan
|
||||
issuerRef:
|
||||
name: step-ca-acme
|
||||
kind: ClusterIssuer
|
||||
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: authentik
|
||||
namespace: authentik
|
||||
spec:
|
||||
entryPoints: [websecure]
|
||||
routes:
|
||||
- match: Host(`id.iamworkin.lan`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: authentik-server
|
||||
port: 9000
|
||||
tls:
|
||||
secretName: authentik-tls
|
||||
169
apps/fc-aistation/fc-aistation.yaml
Normal file
169
apps/fc-aistation/fc-aistation.yaml
Normal file
@@ -0,0 +1,169 @@
|
||||
# FlowerCore.AiStation.Web GitOps adoption manifest.
|
||||
#
|
||||
# Authored from the already-live fc-aistation resources on 2026-06-04.
|
||||
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: aistation-web-data
|
||||
namespace: fc-aistation
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-aistation
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storageClassName: longhorn
|
||||
volumeMode: Filesystem
|
||||
volumeName: pvc-27448d6f-6e66-42a7-a293-73dd8bbd6b3e
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: aistation-web
|
||||
namespace: fc-aistation
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-aistation
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: aistation-web-config
|
||||
image: localhost/fc-aistation-web:v20260602-aistation-owned-deploy-fix2
|
||||
imagePullPolicy: Never
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
name: aistation-web
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
name: http
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 6
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
resources: {}
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: data
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext: {}
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: aistation-web-data
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: aistation-web
|
||||
namespace: fc-aistation
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-aistation
|
||||
spec:
|
||||
clusterIP: 10.43.211.127
|
||||
clusterIPs:
|
||||
- 10.43.211.127
|
||||
internalTrafficPolicy: Cluster
|
||||
ipFamilies:
|
||||
- IPv4
|
||||
ipFamilyPolicy: SingleStack
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 5000
|
||||
selector:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: aistation-web-tls
|
||||
namespace: fc-aistation
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web-tls
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-aistation
|
||||
spec:
|
||||
dnsNames:
|
||||
- aistation.iamworkin.lan
|
||||
issuerRef:
|
||||
kind: ClusterIssuer
|
||||
name: step-ca-acme
|
||||
secretName: aistation-web-tls
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: aistation-web
|
||||
namespace: fc-aistation
|
||||
labels:
|
||||
app.kubernetes.io/name: aistation-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-aistation
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- kind: Rule
|
||||
match: Host(`aistation.iamworkin.lan`)
|
||||
services:
|
||||
- name: aistation-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: aistation-web-tls
|
||||
@@ -1,5 +1,206 @@
|
||||
# FlowerCore Chat — TLS + Ingress
|
||||
# Deployment and Service managed by deploy script (not ArgoCD)
|
||||
# FlowerCore Chat
|
||||
#
|
||||
# ArgoCD-managed workload plus TLS/Ingress. The chat-web-secret remains an
|
||||
# out-of-band Secret until the values are moved into a 1Password-backed item;
|
||||
# the Deployment references it as optional so GitOps can own the workload
|
||||
# without storing secret material in this repo.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-chat
|
||||
labels:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: chat-web-config
|
||||
namespace: fc-chat
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
data:
|
||||
ASPNETCORE_ENVIRONMENT: Production
|
||||
ASPNETCORE_URLS: "http://+:8080"
|
||||
ASPNETCORE_FORWARDEDHEADERS_ENABLED: "true"
|
||||
FlowerCore__Auth__Enabled: "false"
|
||||
FlowerCore__Auth__Oidc__Enabled: "true"
|
||||
FlowerCore__Auth__Oidc__Authority: "https://id.iamworkin.lan/application/o/chat/"
|
||||
FlowerCore__Auth__Oidc__Audience: "chat"
|
||||
FlowerCore__Auth__Oidc__ClientId: "chat"
|
||||
FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
|
||||
# Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS
|
||||
# workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan
|
||||
# backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team-
|
||||
# round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC,
|
||||
# see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for
|
||||
# the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep.
|
||||
# Piper TTS stays on edge1 below (different service, Pi handles TTS fine).
|
||||
FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||
FlowerCore__AI__DefaultModelName: "phi4:14b"
|
||||
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||
ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434"
|
||||
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b"
|
||||
FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
|
||||
FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
|
||||
FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"
|
||||
FlowerCore__AI__Memory__EnableSharedIndexingBackfill: "true"
|
||||
FlowerCore__AI__Memory__SharedIndexingDatabasePath: "/data/chat-memory-index.db"
|
||||
FlowerCore__AI__Skills__Library__LibraryApiUrl: "http://library-web.fc-library.svc.cluster.local"
|
||||
FlowerCore__AI__Skills__Retail__RetailApiUrl: "http://retail-web.fc-retail.svc.cluster.local"
|
||||
FlowerCore__AI__Skills__Intranet__IntranetBaseUrl: "http://intranet-web.intranet.svc.cluster.local"
|
||||
FlowerCore__AI__Skills__Print__PrintMcpBaseUrl: "http://10.0.57.16:5200"
|
||||
FlowerCore__AI__IrcBridge__Enabled: "true"
|
||||
FlowerCore__AI__IrcBridge__DefaultProfileSlug: "it-helpdesk"
|
||||
FlowerCore__AI__IrcBridge__MentionProfileSlug: "it-helpdesk"
|
||||
FlowerCore__AI__IrcBridge__MentionReactiveMode: "mentions-only"
|
||||
FlowerCore__AI__IrcBridge__AllowActionExecution: "false"
|
||||
FlowerCore__AI__Voice__Piper__Host: "10.0.57.17"
|
||||
FlowerCore__AI__Voice__Piper__Port: "10400"
|
||||
FlowerCore__AI__Voice__OutputRoot: "/data/audio"
|
||||
FlowerCore__AI__Voice__RetentionDays: "30"
|
||||
# LLM provider abstraction (ADR-088). Anthropic stays disabled here -- when
|
||||
# an operator wants to enable Claude, they flip Enabled=true and mount
|
||||
# FlowerCore__Anthropic__ApiKey from the onepassword-synced Secret (see
|
||||
# docs/ai-agents/anthropic-integration.md).
|
||||
FlowerCore__Anthropic__Enabled: "false"
|
||||
FlowerCore__Anthropic__BaseUrl: "https://api.anthropic.com"
|
||||
FlowerCore__Anthropic__DefaultModel: "claude-sonnet-4-6"
|
||||
FlowerCore__Anthropic__CheapModel: "claude-haiku-4-5-20251001"
|
||||
FlowerCore__Anthropic__DeepModel: "claude-opus-4-7"
|
||||
FlowerCore__Budget__ResponseCacheEnabled: "true"
|
||||
OTEL_SERVICE_NAME: FlowerCore.Chat
|
||||
OTEL_EXPORTER_OTLP_ENDPOINT: "http://otel-collector.monitoring.svc.cluster.local:4317"
|
||||
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: chat-web-data
|
||||
namespace: fc-chat
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
volumeMode: Filesystem
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: chat-web
|
||||
namespace: fc-chat
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-server
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: chat-web
|
||||
image: localhost/fc-chat-web:v20260603-oidc-authentik
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chat-web-config
|
||||
- secretRef:
|
||||
name: chat-web-secret
|
||||
optional: true
|
||||
env:
|
||||
- name: FlowerCore__Auth__Oidc__Authority
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: chat-oidc-client
|
||||
key: issuer_url
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__ClientId
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: chat-oidc-client
|
||||
key: client_id
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: chat-oidc-client
|
||||
key: client_secret
|
||||
optional: true
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
resources:
|
||||
requests:
|
||||
memory: "128Mi"
|
||||
cpu: "100m"
|
||||
limits:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 6
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: chat-web-data
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: chat-web
|
||||
namespace: fc-chat
|
||||
labels:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app.kubernetes.io/name: chat-web
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
protocol: TCP
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
@@ -30,3 +231,41 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: chat-web-tls
|
||||
---
|
||||
# Public host profile marker. The app treats this header as authoritative for
|
||||
# the public twin, while the internal chat.iamworkin.lan route does not attach
|
||||
# it and keeps the operator-oriented UI.
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: Middleware
|
||||
metadata:
|
||||
name: chat-public-profile-header
|
||||
namespace: fc-chat
|
||||
spec:
|
||||
headers:
|
||||
customRequestHeaders:
|
||||
X-FC-Chat-Host-Profile: "public"
|
||||
---
|
||||
# Public Cloudflare-fronted twin for the anonymous chat surface. Operator
|
||||
# paths are intentionally absent from the allowlist below, so /admin,
|
||||
# /operator, /console, /ops, /api/operator, and /operatorhub miss this route
|
||||
# and return Traefik 404 before reaching the pod. Operator action still needed:
|
||||
# create/verify Cloudflare DNS chat.flowercore.io -> public Traefik endpoint
|
||||
# and mirror the cf-origin-flowercore-io TLS secret into namespace fc-chat.
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: chat-web-public
|
||||
namespace: fc-chat
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- match: Host(`chat.flowercore.io`) && (Path(`/`) || Path(`/chat`) || PathPrefix(`/_blazor`) || PathPrefix(`/_framework`) || PathPrefix(`/_content`) || PathPrefix(`/avatars`) || PathPrefix(`/css`) || PathPrefix(`/js`) || PathPrefix(`/favicon`) || PathPrefix(`/chathub`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||
kind: Rule
|
||||
middlewares:
|
||||
- name: chat-public-profile-header
|
||||
services:
|
||||
- name: chat-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: cf-origin-flowercore-io
|
||||
|
||||
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Runtime secrets for FlowerCore.DeviceManagement.
|
||||
#
|
||||
# OnePasswordItem operator syncs this item into a Kubernetes Secret with the
|
||||
# same name. Expected fields:
|
||||
# DB-Password
|
||||
# mtls-ca.pem
|
||||
# mtls-client.crt
|
||||
# mtls-client.key
|
||||
# mtls-chain.pem
|
||||
#
|
||||
# Do not add literal secret values to this repo. Runtime pods consume the
|
||||
# synced Secret through env vars and read-only mounts.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: fc-devicemgmt-runtime
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt
|
||||
app.kubernetes.io/component: secrets
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"
|
||||
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Certificate for devices.iamworkin.lan.
|
||||
#
|
||||
# Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
|
||||
# devices.iamworkin.lan -> 10.0.56.200
|
||||
# before this Certificate is synced. step-ca ACME cannot see the CoreDNS
|
||||
# wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
|
||||
# (feedback_pfsense_dns_required_for_acme).
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: fc-devicemgmt-web-tls
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
|
||||
spec:
|
||||
secretName: fc-devicemgmt-web-tls
|
||||
issuerRef:
|
||||
name: step-ca-acme
|
||||
kind: ClusterIssuer
|
||||
dnsNames:
|
||||
- devices.iamworkin.lan
|
||||
duration: 720h
|
||||
renewBefore: 240h
|
||||
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
@@ -0,0 +1,81 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: fc-devicemgmt-operator
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
rules:
|
||||
- apiGroups:
|
||||
- devices.flowercore.io
|
||||
resources:
|
||||
- '*'
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- devices.flowercore.io
|
||||
resources:
|
||||
- devices/status
|
||||
- devices/finalizers
|
||||
- devicegroups/status
|
||||
- devicegroups/finalizers
|
||||
- devicepolicies/status
|
||||
- devicepolicies/finalizers
|
||||
- remotecommands/status
|
||||
- remotecommands/finalizers
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- deployments
|
||||
verbs:
|
||||
- get
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- services
|
||||
- configmaps
|
||||
- secrets
|
||||
- events
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- jobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- networking.k8s.io
|
||||
resources:
|
||||
- networkpolicies
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: fc-devicemgmt-operator
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: fc-devicemgmt-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: fc-devicemgmt-operator
|
||||
namespace: fc-devicemgmt
|
||||
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
@@ -0,0 +1,109 @@
|
||||
# FlowerCore.DeviceManagement Operator.
|
||||
#
|
||||
# KubeOps controller for devices.flowercore.io resources. Operator-created
|
||||
# children must set OwnerReferences + traceability labels/annotations per
|
||||
# k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
|
||||
# apps/deployments/get so the process can resolve its own Deployment UID.
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: fc-devicemgmt-operator
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app: fc-devicemgmt-operator
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: fc-devicemgmt-operator
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: fc-devicemgmt-operator
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||
spec:
|
||||
serviceAccountName: fc-devicemgmt-operator
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: operator
|
||||
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 8080
|
||||
env:
|
||||
- name: ASPNETCORE_ENVIRONMENT
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
|
||||
value: "fc-devicemgmt-operator"
|
||||
- name: FlowerCore__Service__Name
|
||||
value: "FlowerCore.DeviceManagement.Operator"
|
||||
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||
value: "system"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 30
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1654
|
||||
runAsGroup: 1654
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
@@ -0,0 +1,151 @@
|
||||
# FlowerCore.DeviceManagement Web.
|
||||
#
|
||||
# Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
|
||||
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||
# nodes before letting ArgoCD sync a live rollout.
|
||||
#
|
||||
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
||||
# The Web pod cannot start until TWO upstream gaps close:
|
||||
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
||||
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
||||
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
||||
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
||||
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
||||
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
||||
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
||||
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
||||
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
||||
# password configured for the MySQL user.
|
||||
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
||||
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
||||
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: fc-devicemgmt-web
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app: fc-devicemgmt-web
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||
spec:
|
||||
replicas: 0
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: fc-devicemgmt-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: fc-devicemgmt-web
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-devicemgmt-web:v20260512-cx5
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
- name: ASPNETCORE_ENVIRONMENT
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
- name: FlowerCore__Service__Name
|
||||
value: "FlowerCore.DeviceManagement.Web"
|
||||
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||
value: "system"
|
||||
- name: FlowerCore__Database__Provider
|
||||
value: "MySql"
|
||||
- name: FlowerCore__Database__Host
|
||||
value: "mysql.fc-mysql.svc"
|
||||
- name: FlowerCore__Database__Database
|
||||
value: "flowercore_devicemgmt"
|
||||
- name: FlowerCore__Database__User
|
||||
value: "fc_devicemgmt"
|
||||
- name: FlowerCore__Database__Password
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: fc-devicemgmt-runtime
|
||||
key: DB-Password
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-client.key"
|
||||
- name: FlowerCore__EventBus__Redis__Configuration
|
||||
value: "redis.fc-redis.svc:6379"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 768Mi
|
||||
startupProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 30
|
||||
readinessProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
failureThreshold: 3
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1654
|
||||
runAsGroup: 1654
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
- name: devicemgmt-mtls
|
||||
mountPath: /secrets/devicemgmt-mtls
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
- name: devicemgmt-mtls
|
||||
secret:
|
||||
secretName: fc-devicemgmt-runtime
|
||||
defaultMode: 0400
|
||||
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
# LAN ingress for FlowerCore.DeviceManagement Web.
|
||||
#
|
||||
# RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
|
||||
# ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: fc-devicemgmt-web
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- match: Host(`devices.iamworkin.lan`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: fc-devicemgmt-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: fc-devicemgmt-web-tls
|
||||
|
||||
# Future public agent/update host gate (OFF by default):
|
||||
#
|
||||
# Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
|
||||
# resolves the public-device-management auth model and route ownership with
|
||||
# UpdateCenter. When enabled, use a separate public IngressRoute with an
|
||||
# explicit Method allowlist, public-host auth middleware, and public TLS
|
||||
# certificate strategy. Leaving this as comments keeps ArgoCD from stealing
|
||||
# live UpdateCenter traffic.
|
||||
#
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: fc-devicemgmt-web-public
|
||||
# namespace: fc-devicemgmt
|
||||
# annotations:
|
||||
# flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
|
||||
# spec:
|
||||
# entryPoints:
|
||||
# - websecure
|
||||
# routes:
|
||||
# - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||
# kind: Rule
|
||||
# services:
|
||||
# - name: fc-devicemgmt-web
|
||||
# port: 80
|
||||
# tls:
|
||||
# secretName: fc-devicemgmt-public-tls
|
||||
13
apps/fc-devicemgmt/namespace.yaml
Normal file
13
apps/fc-devicemgmt/namespace.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
# FlowerCore.DeviceManagement namespace.
|
||||
#
|
||||
# ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
@@ -0,0 +1,224 @@
|
||||
# FlowerCore.DeviceManagement NetworkPolicies.
|
||||
#
|
||||
# NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
|
||||
# Rules include Traefik post-DNAT backend ports per
|
||||
# feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
|
||||
# cold-tier / future artifact path.
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: fc-devicemgmt-web-isolation
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: fc-devicemgmt-web
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
# LAN edge: only cluster Traefik should reach the Web pod for
|
||||
# devices.iamworkin.lan.
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: traefik-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: traefik
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
# Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
|
||||
- from:
|
||||
- ipBlock:
|
||||
cidr: 10.0.56.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.57.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.58.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.68.0/27
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
egress:
|
||||
# CoreDNS.
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
# Database namespace.
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: fc-mysql
|
||||
ports:
|
||||
- port: 3306
|
||||
protocol: TCP
|
||||
# Redis backplane for multi-replica SignalR / live-status fan-out.
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: fc-redis
|
||||
ports:
|
||||
- port: 6379
|
||||
protocol: TCP
|
||||
# Traefik VIP / in-cluster Traefik for self-callbacks and public URL
|
||||
# generation tests. Include post-DNAT backend ports 8443 + 8080.
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.56.200/32
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: traefik-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: traefik
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
# Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
|
||||
# ThinClient, or Server mode. Keep this private-range only.
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.56.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.57.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.58.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.68.0/27
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
- port: 5000
|
||||
protocol: TCP
|
||||
- port: 5001
|
||||
protocol: TCP
|
||||
# Synology NFS cold-tier / artifact mount allowance.
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.58.3/32
|
||||
ports:
|
||||
- port: 2049
|
||||
protocol: TCP
|
||||
- port: 2049
|
||||
protocol: UDP
|
||||
- port: 111
|
||||
protocol: TCP
|
||||
- port: 111
|
||||
protocol: UDP
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: fc-devicemgmt-operator-isolation
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: fc-devicemgmt-operator
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
egress:
|
||||
# CoreDNS.
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
podSelector:
|
||||
matchLabels:
|
||||
k8s-app: kube-dns
|
||||
ports:
|
||||
- port: 53
|
||||
protocol: UDP
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
# Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
|
||||
- to: []
|
||||
ports:
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
- port: 6443
|
||||
protocol: TCP
|
||||
# Agent egress for operator-initiated probes / fallback command dispatch.
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.56.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.57.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.58.0/24
|
||||
- ipBlock:
|
||||
cidr: 10.0.68.0/27
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
- port: 443
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
- port: 8443
|
||||
protocol: TCP
|
||||
- port: 5000
|
||||
protocol: TCP
|
||||
- port: 5001
|
||||
protocol: TCP
|
||||
# Synology NFS allowance for future cold-tier/audit archival jobs.
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.58.3/32
|
||||
ports:
|
||||
- port: 2049
|
||||
protocol: TCP
|
||||
- port: 2049
|
||||
protocol: UDP
|
||||
- port: 111
|
||||
protocol: TCP
|
||||
- port: 111
|
||||
protocol: UDP
|
||||
22
apps/fc-devicemgmt/service-web.yaml
Normal file
22
apps/fc-devicemgmt/service-web.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: fc-devicemgmt-web
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app: fc-devicemgmt-web
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: fc-devicemgmt-web
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
protocol: TCP
|
||||
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: fc-devicemgmt-operator
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
@@ -74,6 +74,14 @@ metadata:
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/FlowerCore Edition Signing Key - edition:aistation-field"
|
||||
---
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: distribution-oidc-client
|
||||
namespace: fc-distribution
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/distribution-oidc-client"
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
@@ -101,6 +109,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
|
||||
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),
|
||||
@@ -118,7 +127,7 @@ spec:
|
||||
# dotnet.exe publish -c Release -o deploy/app \
|
||||
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
||||
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||
image: localhost/fc-distribution:v202605061948
|
||||
image: localhost/fc-distribution:v20260604-oidc-root-anon
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
@@ -130,6 +139,25 @@ spec:
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
# Authentik/OIDC enforcement. Public read/entitlement + the
|
||||
# dist.flowercore.io Method() allowlist stay open; OIDC gates the
|
||||
# operator/admin surface while /healthz remains anonymous.
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Authority
|
||||
value: "https://id.iamworkin.lan/application/o/distribution/"
|
||||
- name: FlowerCore__Auth__Oidc__Audience
|
||||
value: "distribution"
|
||||
- name: FlowerCore__Auth__Oidc__ClientId
|
||||
value: "distribution"
|
||||
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: distribution-oidc-client
|
||||
key: client_secret
|
||||
optional: true
|
||||
# SQLite connection (catalog + data-protection keys via FlowerCoreDbContext).
|
||||
# Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins.
|
||||
- name: FlowerCore__Database__Provider
|
||||
|
||||
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
# FlowerCore Divoom DM Pi Device
|
||||
|
||||
Source-controlled Puppet/Hiera deployment contract for registering the edge2
|
||||
Divoom MiniToo panel as a FlowerCore DeviceManagement-managed Pi device.
|
||||
|
||||
This is not a Kubernetes application. The live panel remains the existing
|
||||
edge2 `flowercore-divoom.service` managed by `FlowerCore.Puppet`
|
||||
`profile::pi::service::divoom`, with the .NET payload deployed out of band
|
||||
and `/opt/flowercore/divoom/data` plus the Bluetooth shell wrappers preserved.
|
||||
Because edge2 is already Hiera-driven through `profile::pi::service::apps`,
|
||||
the deploy home is additive `profile::pi::service` data/profile source, not
|
||||
`profile::edge::service::apps` and not an ArgoCD/K8s app.
|
||||
|
||||
## Scope
|
||||
|
||||
- Stage DeviceManagement registration metadata for the edge2 Divoom MiniToo.
|
||||
- Stage a separate, disabled-by-default DM Agent executor unit for privileged
|
||||
Bluetooth operations once the DM-RPC lane lands.
|
||||
- Keep `flowercore-divoom.service` and `flowercore-divoom-bt.service`
|
||||
untouched: no service replacement, no restart subscription, no K8s surface.
|
||||
- Preserve the current wrapper contract:
|
||||
`/opt/flowercore/divoom/bt-link.sh`,
|
||||
`/opt/flowercore/divoom/bt-reset.sh`, and
|
||||
`/opt/flowercore/divoom/audio-link.sh`.
|
||||
- Keep FM radio disabled and require visible render proof; device-info echo is
|
||||
not render proof.
|
||||
|
||||
## Artifact Map
|
||||
|
||||
| Path | Use |
|
||||
| --- | --- |
|
||||
| `hiera/edge2-divoom-dm-device.overlay.yaml` | Additive Hiera overlay for edge2. Merge into the existing node YAML without removing `fc-pimanager` or `fc-divoom`. |
|
||||
| `puppet/profile/pi/service/divoom_dm_device.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet` after the DM-RPC executor binary exists. |
|
||||
| `puppet/templates/divoom-device-registration.json.epp` | DM device registration metadata rendered on edge2. |
|
||||
| `puppet/templates/flowercore-divoom-dm-agent.service.epp` | Separate DM Agent systemd unit. Defaults are stopped and disabled until a later cutover. |
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
1. Land these artifacts in bluejay-infra as the deploy contract.
|
||||
2. Vendor the Puppet profile and EPP templates into `FlowerCore.Puppet`.
|
||||
3. Merge the Hiera overlay into `data/nodes/edge2.iamworkin.lan.yaml`.
|
||||
4. Run Puppet in noop first, preferably with a node-local validation directory
|
||||
under `~/.fcv` rather than `/tmp`.
|
||||
5. Only enable the DM Agent service after the DeviceManagement BT executor has
|
||||
landed and passed operator-eyeball render proof.
|
||||
@@ -0,0 +1,32 @@
|
||||
---
|
||||
# Merge into FlowerCore.Puppet data/nodes/edge2.iamworkin.lan.yaml.
|
||||
# Additive overlay only: keep the existing fc-pimanager version/tarball entry,
|
||||
# keep fc-divoom enabled, and do not move Divoom into Kubernetes.
|
||||
|
||||
profile::pi::service::apps:
|
||||
fc-pimanager:
|
||||
binary: 'FlowerCore.PiManager.Web'
|
||||
install_dir: '/opt/fc-pimanager'
|
||||
port: 5000
|
||||
environment: 'edge2'
|
||||
version: '2026.05.28.1646'
|
||||
tarball_source: 'puppet:///modules/profile/pi/builds/fc-pimanager.tar.gz'
|
||||
fc-divoom:
|
||||
enabled: true
|
||||
|
||||
profile::pi::service::divoom_dm_device::ensure: 'present'
|
||||
profile::pi::service::divoom_dm_device::service_enabled: false
|
||||
profile::pi::service::divoom_dm_device::service_ensure: 'stopped'
|
||||
profile::pi::service::divoom_dm_device::device_id: 'edge2-divoom-minitoo'
|
||||
profile::pi::service::divoom_dm_device::display_name: 'edge2 Divoom MiniToo'
|
||||
profile::pi::service::divoom_dm_device::host_fqdn: 'edge2.iamworkin.lan'
|
||||
profile::pi::service::divoom_dm_device::dm_web_url: 'https://devicemgmt.iamworkin.lan'
|
||||
profile::pi::service::divoom_dm_device::divoom_install_dir: '/opt/flowercore/divoom'
|
||||
profile::pi::service::divoom_dm_device::agent_install_dir: '/opt/flowercore/devicemanagement-agent'
|
||||
profile::pi::service::divoom_dm_device::bt_candidate_channels:
|
||||
- '1'
|
||||
- '10'
|
||||
profile::pi::service::divoom_dm_device::default_bt_channel: '1'
|
||||
profile::pi::service::divoom_dm_device::a2dp_default_state: 'off'
|
||||
profile::pi::service::divoom_dm_device::fm_radio_enabled: false
|
||||
profile::pi::service::divoom_dm_device::visible_render_proof_required: true
|
||||
@@ -0,0 +1,140 @@
|
||||
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_dm_device.pp.
|
||||
# This profile is additive to profile::pi::service::divoom. It must not manage,
|
||||
# restart, replace, or subscribe the existing flowercore-divoom.service.
|
||||
class profile::pi::service::divoom_dm_device (
|
||||
Enum['present', 'absent'] $ensure = 'present',
|
||||
Boolean $service_enabled = false,
|
||||
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||
String $service_name = 'flowercore-divoom-dm-agent',
|
||||
String $device_id = 'edge2-divoom-minitoo',
|
||||
String $display_name = 'edge2 Divoom MiniToo',
|
||||
String $host_fqdn = 'edge2.iamworkin.lan',
|
||||
String $dm_web_url = 'https://devicemgmt.iamworkin.lan',
|
||||
String $divoom_install_dir = '/opt/flowercore/divoom',
|
||||
String $agent_install_dir = '/opt/flowercore/devicemanagement-agent',
|
||||
String $agent_binary = 'FlowerCore.DeviceManagement.Agent',
|
||||
Array[String] $bt_candidate_channels = ['1', '10'],
|
||||
String $default_bt_channel = '1',
|
||||
Enum['on', 'off'] $a2dp_default_state = 'off',
|
||||
Boolean $fm_radio_enabled = false,
|
||||
Boolean $visible_render_proof_required = true,
|
||||
) {
|
||||
include profile::workstation::safe_account_exclusion
|
||||
|
||||
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||
$config_dir = '/etc/flowercore/device-management/devices'
|
||||
$state_dir = '/var/lib/flowercore/divoom-dm-agent'
|
||||
$log_dir = '/var/log/flowercore/divoom-dm-agent'
|
||||
$registration_path = "${config_dir}/${device_id}.json"
|
||||
$agent_binary_path = "${agent_install_dir}/${agent_binary}"
|
||||
$bt_channels_json = inline_template('[<%= @bt_candidate_channels.map { |c| "\"#{c}\"" }.join(", ") %>]')
|
||||
|
||||
if $safe_account {
|
||||
notify { 'fc-divoom-dm-device safe-account exclusion':
|
||||
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom DM Pi device profile refused to apply on operator workstation',
|
||||
}
|
||||
|
||||
if $facts['os']['family'] != 'windows' {
|
||||
ensure_resource('file', '/var/log/flowercore-audit', {
|
||||
'ensure' => 'directory',
|
||||
'owner' => 'root',
|
||||
'group' => 'root',
|
||||
'mode' => '0755',
|
||||
})
|
||||
|
||||
file { '/var/log/flowercore-audit/safe-account-noop-fc-divoom-dm-device.log':
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
content => "noop: divoom dm pi device profile refused to apply on safe-account host\n",
|
||||
require => File['/var/log/flowercore-audit'],
|
||||
}
|
||||
}
|
||||
} elsif $ensure == 'absent' {
|
||||
service { $service_name:
|
||||
ensure => stopped,
|
||||
enable => false,
|
||||
}
|
||||
|
||||
file { [
|
||||
"/etc/systemd/system/${service_name}.service",
|
||||
$registration_path,
|
||||
]:
|
||||
ensure => absent,
|
||||
}
|
||||
|
||||
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||
command => '/usr/bin/systemctl daemon-reload',
|
||||
refreshonly => true,
|
||||
path => ['/usr/bin', '/bin'],
|
||||
}
|
||||
} else {
|
||||
case $facts['os']['family'] {
|
||||
'Debian': {}
|
||||
default: { fail("profile::pi::service::divoom_dm_device only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||
}
|
||||
|
||||
file { [$config_dir, $state_dir, $log_dir]:
|
||||
ensure => directory,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0755',
|
||||
}
|
||||
|
||||
file { $registration_path:
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
content => epp('profile/pi/fc_divoom_dm/divoom-device-registration.json.epp', {
|
||||
'device_id' => $device_id,
|
||||
'display_name' => $display_name,
|
||||
'host_fqdn' => $host_fqdn,
|
||||
'divoom_install_dir' => $divoom_install_dir,
|
||||
'bt_channels_json' => $bt_channels_json,
|
||||
'default_bt_channel' => $default_bt_channel,
|
||||
'a2dp_default_state' => $a2dp_default_state,
|
||||
'fm_radio_enabled' => $fm_radio_enabled,
|
||||
'visible_render_proof_required' => $visible_render_proof_required,
|
||||
}),
|
||||
require => File[$config_dir],
|
||||
}
|
||||
|
||||
file { "/etc/systemd/system/${service_name}.service":
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
content => epp('profile/pi/fc_divoom_dm/flowercore-divoom-dm-agent.service.epp', {
|
||||
'service_name' => $service_name,
|
||||
'device_id' => $device_id,
|
||||
'dm_web_url' => $dm_web_url,
|
||||
'registration_path' => $registration_path,
|
||||
'divoom_install_dir' => $divoom_install_dir,
|
||||
'agent_install_dir' => $agent_install_dir,
|
||||
'agent_binary_path' => $agent_binary_path,
|
||||
'state_dir' => $state_dir,
|
||||
'log_dir' => $log_dir,
|
||||
}),
|
||||
notify => Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||
require => File[$registration_path],
|
||||
}
|
||||
|
||||
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||
command => '/usr/bin/systemctl daemon-reload',
|
||||
refreshonly => true,
|
||||
path => ['/usr/bin', '/bin'],
|
||||
}
|
||||
|
||||
service { $service_name:
|
||||
ensure => $service_ensure,
|
||||
enable => $service_enabled,
|
||||
require => [
|
||||
File["/etc/systemd/system/${service_name}.service"],
|
||||
File[$registration_path],
|
||||
Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"deviceId": "<%= $device_id %>",
|
||||
"displayName": "<%= $display_name %>",
|
||||
"hostFqdn": "<%= $host_fqdn %>",
|
||||
"kind": "DivoomMiniToo",
|
||||
"managedBy": "FlowerCore.DeviceManagement",
|
||||
"executionMode": "Pi",
|
||||
"transport": {
|
||||
"kind": "BluetoothSerial",
|
||||
"candidateChannels": <%= $bt_channels_json %>,
|
||||
"defaultChannel": "<%= $default_bt_channel %>",
|
||||
"deviceInfoIsRenderProof": false,
|
||||
"visibleRenderProofRequired": <%= $visible_render_proof_required %>
|
||||
},
|
||||
"paths": {
|
||||
"divoomInstallDir": "<%= $divoom_install_dir %>",
|
||||
"btLink": "<%= $divoom_install_dir %>/bt-link.sh",
|
||||
"btReset": "<%= $divoom_install_dir %>/bt-reset.sh",
|
||||
"audioLink": "<%= $divoom_install_dir %>/audio-link.sh"
|
||||
},
|
||||
"capabilities": {
|
||||
"supportsBluetoothSerial": true,
|
||||
"supportsBtChannelRedetect": true,
|
||||
"supportsBtHardReset": true,
|
||||
"supportsBtAudioProfileSwitch": true,
|
||||
"a2dpDefaultState": "<%= $a2dp_default_state %>",
|
||||
"fmRadioEnabled": <%= $fm_radio_enabled %>
|
||||
},
|
||||
"safety": {
|
||||
"preserveExistingService": "flowercore-divoom.service",
|
||||
"preserveDataDirectory": "<%= $divoom_install_dir %>/data",
|
||||
"doNotEnableFmRadio": true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Divoom DM Agent Bluetooth executor
|
||||
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||
Wants=network-online.target
|
||||
After=network-online.target bluetooth.service
|
||||
Requires=bluetooth.service
|
||||
ConditionPathExists=<%= $agent_binary_path %>
|
||||
ConditionPathExists=<%= $registration_path %>
|
||||
ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh
|
||||
ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh
|
||||
ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=stoltz
|
||||
Group=stoltz
|
||||
WorkingDirectory=<%= $agent_install_dir %>
|
||||
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||
Environment=FLOWERCORE_DM_DEVICE_REGISTRATION=<%= $registration_path %>
|
||||
Environment=Divoom__Bluetooth__DeviceInfoIsRenderProof=false
|
||||
Environment=Divoom__Bluetooth__VisibleRenderProofRequired=true
|
||||
Environment=Divoom__Bluetooth__A2dpDefaultState=off
|
||||
ExecStart=<%= $agent_binary_path %> --mode=Pi --device-id=<%= $device_id %> --dm-web-url=<%= $dm_web_url %> --registration=<%= $registration_path %>
|
||||
Restart=on-failure
|
||||
RestartSec=10s
|
||||
StartLimitBurst=3
|
||||
StartLimitIntervalSec=300s
|
||||
SupplementaryGroups=bluetooth audio dialout
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=<%= $state_dir %> <%= $log_dir %>
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
44
apps/fc-divoom-tv-pi/README.md
Normal file
44
apps/fc-divoom-tv-pi/README.md
Normal file
@@ -0,0 +1,44 @@
|
||||
# FlowerCore Divoom TV Pi HDMI
|
||||
|
||||
Source-controlled deploy shape for the native `FlowerCore.Divoom.Tv`
|
||||
Avalonia HDMI renderer on a Raspberry Pi connected to a TV.
|
||||
|
||||
This is a Puppet/systemd appliance bundle, not a Kubernetes application. It
|
||||
mirrors the existing `fc-signage-pi-player` pattern: bluejay-infra carries the
|
||||
systemd units, scripts, Hiera shape, and Puppet profile source that
|
||||
`FlowerCore.Puppet` vendors and installs.
|
||||
|
||||
## Scope
|
||||
|
||||
- Launch the future `FlowerCore.Divoom.Tv` linux-arm64 self-contained payload
|
||||
from `/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv`.
|
||||
- Prefer `cage` as the Wayland fullscreen compositor, with direct app launch as
|
||||
a fallback for development images.
|
||||
- Restart the app after HDMI hotplug with a 2 second DRM settle delay.
|
||||
- Keep all runtime state local: `/var/lib/fc-divoom-tv` and
|
||||
`/var/log/fc-divoom-tv`.
|
||||
- Avoid CDN/runtime fetches; the app renders the in-house Divoom scene catalog
|
||||
locally.
|
||||
|
||||
## Artifact Map
|
||||
|
||||
| Path | Use |
|
||||
| --- | --- |
|
||||
| `systemd/flowercore-divoom-tv.service` | Fullscreen Avalonia HDMI app service. |
|
||||
| `systemd/flowercore-divoom-tv-hdmi.service` | HDMI hotplug responder service. |
|
||||
| `systemd/99-flowercore-divoom-tv-hdmi.rules` | DRM udev hotplug rule. |
|
||||
| `scripts/flowercore-divoom-tv-prelaunch.sh` | Preflight checks and local directory creation. |
|
||||
| `scripts/flowercore-divoom-tv-launch.sh` | Cage-first fullscreen launcher. |
|
||||
| `scripts/flowercore-divoom-tv-hdmi-respond.sh` | Hotplug settle and restart script. |
|
||||
| `puppet/profile/pi/service/divoom_tv.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet`. |
|
||||
| `hiera/example-divoom-tv-pi.iamworkin.lan.yaml` | Example node Hiera for a Divoom TV Pi. |
|
||||
|
||||
## Rollout Notes
|
||||
|
||||
1. Build `FlowerCore.Divoom.Tv` with `dotnet.exe publish -c Release -r linux-arm64 --self-contained`.
|
||||
2. Stage the payload to `/opt/flowercore/divoom-tv/` through the standard noc1
|
||||
jump path and avoid `/tmp` for unprivileged Pi scratch.
|
||||
3. Vendor the profile and static files into `FlowerCore.Puppet`.
|
||||
4. Run Puppet noop, then apply on the target Pi.
|
||||
5. Prove deployment with `systemctl is-active flowercore-divoom-tv.service`,
|
||||
journal lines showing frames presented, and a visible HDMI display check.
|
||||
@@ -0,0 +1,19 @@
|
||||
---
|
||||
# Example node data for a dedicated Pi -> HDMI -> TV Divoom renderer.
|
||||
# Copy into FlowerCore.Puppet data/nodes/<hostname>.iamworkin.lan.yaml only
|
||||
# after the Pi has a static DHCP/DNS entry and the linux-arm64 payload exists.
|
||||
|
||||
facts:
|
||||
role: pi_prototype
|
||||
|
||||
profile::motd::role: 'Divoom TV HDMI Renderer'
|
||||
|
||||
profile::pi::service::divoom_tv::ensure: 'present'
|
||||
profile::pi::service::divoom_tv::service_enabled: true
|
||||
profile::pi::service::divoom_tv::service_ensure: 'running'
|
||||
profile::pi::service::divoom_tv::install_dir: '/opt/flowercore/divoom-tv'
|
||||
profile::pi::service::divoom_tv::state_dir: '/var/lib/fc-divoom-tv'
|
||||
profile::pi::service::divoom_tv::log_dir: '/var/log/fc-divoom-tv'
|
||||
profile::pi::service::divoom_tv::presentation_mode: 'PillarboxSquare'
|
||||
profile::pi::service::divoom_tv::startup_scene: 'bluejay-clock'
|
||||
profile::pi::service::divoom_tv::reduced_motion: false
|
||||
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
@@ -0,0 +1,149 @@
|
||||
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_tv.pp.
|
||||
# Static files come from profile/pi/fc_divoom_tv/ after this bluejay-infra
|
||||
# bundle is vendored into the Puppet control repo.
|
||||
class profile::pi::service::divoom_tv (
|
||||
Enum['present', 'absent'] $ensure = 'present',
|
||||
Boolean $service_enabled = false,
|
||||
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||
String $service_name = 'flowercore-divoom-tv',
|
||||
String $user = 'fc-divoom-tv',
|
||||
String $group = 'fc-divoom-tv',
|
||||
String $install_dir = '/opt/flowercore/divoom-tv',
|
||||
String $state_dir = '/var/lib/fc-divoom-tv',
|
||||
String $log_dir = '/var/log/fc-divoom-tv',
|
||||
String $presentation_mode = 'PillarboxSquare',
|
||||
String $startup_scene = 'bluejay-clock',
|
||||
Boolean $reduced_motion = false,
|
||||
) {
|
||||
include profile::workstation::safe_account_exclusion
|
||||
|
||||
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||
|
||||
if $safe_account {
|
||||
notify { 'fc-divoom-tv safe-account exclusion':
|
||||
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom TV Pi profile refused to apply on operator workstation',
|
||||
}
|
||||
} elsif $ensure == 'absent' {
|
||||
service { $service_name:
|
||||
ensure => stopped,
|
||||
enable => false,
|
||||
}
|
||||
|
||||
file { [
|
||||
"/etc/systemd/system/${service_name}.service",
|
||||
"/etc/systemd/system/${service_name}-hdmi.service",
|
||||
'/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules',
|
||||
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh',
|
||||
'/usr/local/bin/flowercore-divoom-tv-launch.sh',
|
||||
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh',
|
||||
'/etc/flowercore/divoom-tv.env',
|
||||
]:
|
||||
ensure => absent,
|
||||
}
|
||||
} else {
|
||||
case $facts['os']['family'] {
|
||||
'Debian': {}
|
||||
default: { fail("profile::pi::service::divoom_tv only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||
}
|
||||
|
||||
package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']:
|
||||
ensure => installed,
|
||||
}
|
||||
|
||||
group { $group:
|
||||
ensure => present,
|
||||
system => true,
|
||||
}
|
||||
|
||||
user { $user:
|
||||
ensure => present,
|
||||
system => true,
|
||||
gid => $group,
|
||||
home => $state_dir,
|
||||
managehome => false,
|
||||
shell => '/usr/sbin/nologin',
|
||||
require => Group[$group],
|
||||
}
|
||||
|
||||
file { [$install_dir, $state_dir, $log_dir, '/etc/flowercore']:
|
||||
ensure => directory,
|
||||
owner => $user,
|
||||
group => $group,
|
||||
mode => '0755',
|
||||
}
|
||||
|
||||
file { '/etc/flowercore/divoom-tv.env':
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
content => "FC_DIVOOM_TV_PRESENTATION_MODE=${presentation_mode}\nFC_DIVOOM_TV_START_SCENE=${startup_scene}\nFC_DIVOOM_TV_REDUCED_MOTION=${reduced_motion}\n",
|
||||
require => File['/etc/flowercore'],
|
||||
}
|
||||
|
||||
$script_map = {
|
||||
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-prelaunch.sh',
|
||||
'/usr/local/bin/flowercore-divoom-tv-launch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh',
|
||||
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi-respond.sh',
|
||||
}
|
||||
|
||||
$script_map.each |$dest, $src| {
|
||||
file { $dest:
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0755',
|
||||
source => "puppet:///modules/${src}",
|
||||
}
|
||||
}
|
||||
|
||||
$unit_map = {
|
||||
"/etc/systemd/system/${service_name}.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service',
|
||||
"/etc/systemd/system/${service_name}-hdmi.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi.service',
|
||||
}
|
||||
|
||||
$unit_map.each |$dest, $src| {
|
||||
file { $dest:
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
source => "puppet:///modules/${src}",
|
||||
notify => Exec['fc-divoom-tv-systemd-reload'],
|
||||
}
|
||||
}
|
||||
|
||||
file { '/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules':
|
||||
ensure => file,
|
||||
owner => 'root',
|
||||
group => 'root',
|
||||
mode => '0644',
|
||||
source => 'puppet:///modules/profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules',
|
||||
notify => Exec['fc-divoom-tv-udev-reload'],
|
||||
}
|
||||
|
||||
exec { 'fc-divoom-tv-systemd-reload':
|
||||
command => '/usr/bin/systemctl daemon-reload',
|
||||
refreshonly => true,
|
||||
path => ['/usr/bin', '/bin'],
|
||||
}
|
||||
|
||||
exec { 'fc-divoom-tv-udev-reload':
|
||||
command => '/usr/bin/udevadm control --reload-rules',
|
||||
refreshonly => true,
|
||||
path => ['/usr/bin', '/bin'],
|
||||
}
|
||||
|
||||
service { $service_name:
|
||||
ensure => $service_ensure,
|
||||
enable => $service_enabled,
|
||||
require => [
|
||||
File["/etc/systemd/system/${service_name}.service"],
|
||||
File['/etc/flowercore/divoom-tv.env'],
|
||||
File['/usr/local/bin/flowercore-divoom-tv-prelaunch.sh'],
|
||||
File['/usr/local/bin/flowercore-divoom-tv-launch.sh'],
|
||||
Exec['fc-divoom-tv-systemd-reload'],
|
||||
],
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
sleep 2
|
||||
systemctl restart flowercore-divoom-tv.service
|
||||
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||
PRESENTATION_MODE="${FC_DIVOOM_TV_PRESENTATION_MODE:-PillarboxSquare}"
|
||||
START_SCENE="${FC_DIVOOM_TV_START_SCENE:-bluejay-clock}"
|
||||
REDUCED_MOTION="${FC_DIVOOM_TV_REDUCED_MOTION:-false}"
|
||||
|
||||
COMMON_ARGS=(
|
||||
"--target=hdmi"
|
||||
"--presentation-mode=${PRESENTATION_MODE}"
|
||||
"--startup-scene=${START_SCENE}"
|
||||
"--reduced-motion=${REDUCED_MOTION}"
|
||||
"--state-dir=${STATE_DIR}"
|
||||
"--log-dir=${LOG_DIR}"
|
||||
)
|
||||
|
||||
if command -v cage >/dev/null 2>&1; then
|
||||
exec cage -- "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||
fi
|
||||
|
||||
echo "[$(date -Is)] cage not found; launching FlowerCore.Divoom.Tv directly" >&2
|
||||
exec "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||
@@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||
|
||||
mkdir -p "${STATE_DIR}" "${LOG_DIR}"
|
||||
|
||||
if [[ ! -x "${APP_BIN}" ]]; then
|
||||
echo "[$(date -Is)] missing executable ${APP_BIN}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -d /sys/class/drm ]] && ! find /sys/class/drm -maxdepth 1 -name 'card*-HDMI-A-*' -print -quit | grep -q .; then
|
||||
echo "[$(date -Is)] no HDMI connector visible yet; continuing so the app can wait for display" >&2
|
||||
fi
|
||||
|
||||
if command -v cage >/dev/null 2>&1; then
|
||||
echo "[$(date -Is)] cage available for fullscreen Wayland launch"
|
||||
else
|
||||
echo "[$(date -Is)] cage not installed; direct launch fallback will be used" >&2
|
||||
fi
|
||||
@@ -0,0 +1,2 @@
|
||||
# Settle DRM for 2s before restarting the fullscreen Avalonia renderer.
|
||||
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-divoom-tv-hdmi.service"
|
||||
@@ -0,0 +1,7 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Divoom TV HDMI hotplug responder
|
||||
DefaultDependencies=no
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh
|
||||
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
@@ -0,0 +1,40 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Divoom TV HDMI Renderer (Avalonia fullscreen)
|
||||
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||
Wants=network-online.target
|
||||
After=network-online.target systemd-user-sessions.service
|
||||
ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=fc-divoom-tv
|
||||
Group=fc-divoom-tv
|
||||
WorkingDirectory=/opt/flowercore/divoom-tv
|
||||
EnvironmentFile=-/etc/flowercore/divoom-tv.env
|
||||
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||
Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv
|
||||
RuntimeDirectory=fc-divoom-tv
|
||||
RuntimeDirectoryMode=0700
|
||||
ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh
|
||||
ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh
|
||||
Restart=always
|
||||
RestartSec=10s
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=300s
|
||||
MemoryMax=2G
|
||||
MemoryHigh=1500M
|
||||
PrivateTmp=true
|
||||
NoNewPrivileges=true
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/var/lib/fc-divoom-tv /var/log/fc-divoom-tv /run/fc-divoom-tv
|
||||
TTYPath=/dev/tty1
|
||||
StandardInput=tty
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
TTYReset=yes
|
||||
TTYVHangup=yes
|
||||
TTYVTDisallocate=yes
|
||||
|
||||
[Install]
|
||||
WantedBy=graphical.target
|
||||
481
apps/fc-dns/fc-dns.yaml
Normal file
481
apps/fc-dns/fc-dns.yaml
Normal file
@@ -0,0 +1,481 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-dns
|
||||
labels:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
---
|
||||
# 1Password-backed Secret for the pfSense admin password.
|
||||
# The operator watches this CRD, resolves the vault item, and produces a
|
||||
# K8s Secret of the same name with each 1P field as a key. The `password`
|
||||
# field of the "pfSense Admin" item becomes Secret key `password`.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: pfsense-admin
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/pfSense Admin"
|
||||
---
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: dns-oidc-client
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/dns-oidc-client"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: dns-web-data
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
accessModes: [ReadWriteOnce]
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: dns-web-config
|
||||
namespace: fc-dns
|
||||
data:
|
||||
appsettings.Production.json: |
|
||||
{
|
||||
"FlowerCore": {
|
||||
"Auth": {
|
||||
"Enabled": true,
|
||||
"Oidc": {
|
||||
"Enabled": true,
|
||||
"Audience": "dns",
|
||||
"RequireHttpsMetadata": true
|
||||
}
|
||||
},
|
||||
"Database": {
|
||||
"Provider": "Sqlite",
|
||||
"ConnectionStrings": {
|
||||
"Sqlite": "Data Source=/data/dns.db"
|
||||
}
|
||||
},
|
||||
"Tenant": {
|
||||
"DefaultTenantId": "default",
|
||||
"JwtClaimsEnabled": false,
|
||||
"DefaultTenantHosts": [
|
||||
"dns.iamworkin.lan"
|
||||
]
|
||||
},
|
||||
"Audit": {
|
||||
"HashChain": {
|
||||
"BridgeSensitivity": {
|
||||
"Distribution": "Warn"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dns-web
|
||||
namespace: fc-dns
|
||||
labels:
|
||||
app.kubernetes.io/name: dns-web
|
||||
app.kubernetes.io/managed-by: flowercore
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: dns-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: dns-web
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "5320"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
serviceAccountName: dns-web
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1654
|
||||
runAsGroup: 1654
|
||||
fsGroup: 1654
|
||||
containers:
|
||||
- name: dns-web
|
||||
image: localhost/fc-dns-web:v20260604-oidc-proper
|
||||
imagePullPolicy: Never
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ALL]
|
||||
ports:
|
||||
- containerPort: 5320
|
||||
env:
|
||||
# pfSense admin password resolved by the 1Password operator.
|
||||
# `FallbackPassword` is the Slice A seam exposed by
|
||||
# OptionsFallbackPasswordResolver; Slice B will replace it with
|
||||
# a pull-at-runtime 1P Connect resolver once Shared.Vault ships.
|
||||
- name: FlowerCore__Dns__Providers__PfSenseUnbound__FallbackPassword
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: pfsense-admin
|
||||
key: password
|
||||
- name: FlowerCore__Auth__Oidc__Authority
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: dns-oidc-client
|
||||
key: issuer_url
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__ClientId
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: dns-oidc-client
|
||||
key: client_id
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: dns-oidc-client
|
||||
key: client_secret
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Audience
|
||||
value: "dns"
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
- name: config
|
||||
mountPath: /app/appsettings.Production.json
|
||||
subPath: appsettings.Production.json
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 96Mi
|
||||
limits:
|
||||
cpu: 300m
|
||||
memory: 384Mi
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5320
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5320
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 30
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: dns-web-data
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
- name: config
|
||||
configMap:
|
||||
name: dns-web-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dns-web
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: dns-web
|
||||
ports:
|
||||
- port: 5320
|
||||
targetPort: 5320
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: dns-web
|
||||
namespace: fc-dns
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: dns-web
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["namespaces", "pods", "services", "secrets", "configmaps"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: ["cert-manager.io"]
|
||||
resources: ["certificates"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: dns-web
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: dns-web
|
||||
namespace: fc-dns
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: dns-web
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: dns-web-cert
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
secretName: dns-web-tls
|
||||
issuerRef:
|
||||
name: step-ca-dns01
|
||||
kind: ClusterIssuer
|
||||
dnsNames:
|
||||
- dns.iamworkin.lan
|
||||
duration: 720h
|
||||
renewBefore: 240h
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: dns-web
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
entryPoints: [websecure]
|
||||
routes:
|
||||
- match: Host(`dns.iamworkin.lan`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: dns-web
|
||||
port: 5320
|
||||
tls:
|
||||
secretName: dns-web-tls
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: dns-acme-webhook
|
||||
namespace: fc-dns
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: dns-acme-webhook
|
||||
namespace: fc-dns
|
||||
labels:
|
||||
app.kubernetes.io/name: dns-acme-webhook
|
||||
app.kubernetes.io/managed-by: flowercore
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: dns-acme-webhook
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: dns-acme-webhook
|
||||
spec:
|
||||
serviceAccountName: dns-acme-webhook
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1654
|
||||
runAsGroup: 1654
|
||||
fsGroup: 1654
|
||||
containers:
|
||||
- name: dns-acme-webhook
|
||||
image: localhost/fc-dns-acme-webhook:v202604290845
|
||||
imagePullPolicy: Never
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: [ALL]
|
||||
ports:
|
||||
- containerPort: 9443
|
||||
name: https
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: https://+:9443
|
||||
- name: Kestrel__Certificates__Default__Path
|
||||
value: /tls/tls.crt
|
||||
- name: Kestrel__Certificates__Default__KeyPath
|
||||
value: /tls/tls.key
|
||||
- name: FlowerCore__Dns__AcmeWebhook__ServiceBaseUrl
|
||||
value: http://dns-web:5320
|
||||
- name: FlowerCore__Dns__AcmeWebhook__GroupName
|
||||
value: acme.flowercore.io
|
||||
- name: FlowerCore__Dns__AcmeWebhook__SolverName
|
||||
value: flowercore-dns
|
||||
- name: FlowerCore__Dns__AcmeWebhook__Version
|
||||
value: v1alpha1
|
||||
volumeMounts:
|
||||
- name: tls
|
||||
mountPath: /tls
|
||||
readOnly: true
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
scheme: HTTPS
|
||||
path: /readyz
|
||||
port: https
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
scheme: HTTPS
|
||||
path: /healthz
|
||||
port: https
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 5
|
||||
volumes:
|
||||
- name: tls
|
||||
secret:
|
||||
secretName: dns-acme-webhook-tls
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: dns-acme-webhook
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
selector:
|
||||
app.kubernetes.io/name: dns-acme-webhook
|
||||
ports:
|
||||
- port: 443
|
||||
targetPort: https
|
||||
name: https
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: dns-acme-webhook-selfsigned
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
selfSigned: {}
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: dns-acme-webhook-ca
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
secretName: dns-acme-webhook-ca
|
||||
duration: 43800h
|
||||
issuerRef:
|
||||
name: dns-acme-webhook-selfsigned
|
||||
commonName: ca.dns-acme-webhook.fc-dns
|
||||
isCA: true
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Issuer
|
||||
metadata:
|
||||
name: dns-acme-webhook-ca-issuer
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
ca:
|
||||
secretName: dns-acme-webhook-ca
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: dns-acme-webhook-serving-cert
|
||||
namespace: fc-dns
|
||||
spec:
|
||||
secretName: dns-acme-webhook-tls
|
||||
duration: 8760h
|
||||
issuerRef:
|
||||
name: dns-acme-webhook-ca-issuer
|
||||
dnsNames:
|
||||
- dns-acme-webhook
|
||||
- dns-acme-webhook.fc-dns
|
||||
- dns-acme-webhook.fc-dns.svc
|
||||
---
|
||||
apiVersion: apiregistration.k8s.io/v1
|
||||
kind: APIService
|
||||
metadata:
|
||||
name: v1alpha1.acme.flowercore.io
|
||||
annotations:
|
||||
cert-manager.io/inject-ca-from: fc-dns/dns-acme-webhook-serving-cert
|
||||
spec:
|
||||
group: acme.flowercore.io
|
||||
groupPriorityMinimum: 1000
|
||||
service:
|
||||
name: dns-acme-webhook
|
||||
namespace: fc-dns
|
||||
version: v1alpha1
|
||||
versionPriority: 15
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: dns-acme-webhook-solver
|
||||
rules:
|
||||
- apiGroups: ["acme.flowercore.io"]
|
||||
resources: ["flowercore-dns"]
|
||||
verbs: ["create"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: dns-acme-webhook-solver
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cert-manager
|
||||
namespace: cert-manager
|
||||
roleRef:
|
||||
kind: ClusterRole
|
||||
name: dns-acme-webhook-solver
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: ClusterIssuer
|
||||
metadata:
|
||||
name: step-ca-dns01
|
||||
spec:
|
||||
acme:
|
||||
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ4RENDQVdxZ0F3SUJBZ0lSQVBZMzU3RzZvdzZ6TUFMNSs0YlMya2t3Q2dZSUtvWkl6ajBFQXdJd1FERWEKTUJnR0ExVUVDaE1SU1VGdFYyOXlhMmx1SUVGRFRVVWdRMEV4SWpBZ0JnTlZCQU1UR1VsQmJWZHZjbXRwYmlCQgpRMDFGSUVOQklGSnZiM1FnUTBFd0hoY05Nall3TXpBNE1UZ3dOekV4V2hjTk16WXdNekExTVRnd056RXhXakJBCk1Sb3dHQVlEVlFRS0V4RkpRVzFYYjNKcmFXNGdRVU5OUlNCRFFURWlNQ0FHQTFVRUF4TVpTVUZ0VjI5eWEybHUKSUVGRFRVVWdRMEVnVW05dmRDQkRRVEJaTUJNR0J5cUdTTTQ5QWdFR0NDcUdTTTQ5QXdFSEEwSUFCSjJuMDRYMQpKWm81WmRxL2kxSWR2OCtmcXdaeUF6Qmg3d2hicWowU1dzSkw4VVdSYWJDTXFZQ3M3K2RYTzB4UlN6cWt3RkRMCngrdm9vT2FpOFJnUk5oYWpSVEJETUE0R0ExVWREd0VCL3dRRUF3SUJCakFTQmdOVkhSTUJBZjhFQ0RBR0FRSC8KQWdFQk1CMEdBMVVkRGdRV0JCUm51UFBRUjZpTS9INnZPbHVpVTNTeWdheXo4akFLQmdncWhrak9QUVFEQWdOSQpBREJGQWlFQXJRSzlkWVBHbUFac2RZbmp6aXVGVlZFNU5LWlVjY2VZdkdmR0MrdExYVXNDSUF1ZEYyekpyQ1JxCjNtSzUwWlpFVC9md1RrSndpRUY0ODI0bWpQOHAxQ0tNCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
|
||||
privateKeySecretRef:
|
||||
name: step-ca-dns01-account-key
|
||||
server: https://10.0.56.10:9443/acme/acme/directory
|
||||
solvers:
|
||||
- dns01:
|
||||
webhook:
|
||||
groupName: acme.flowercore.io
|
||||
solverName: flowercore-dns
|
||||
6
apps/fc-dns/kustomization.yaml
Normal file
6
apps/fc-dns/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# ArgoCD's bluejay-infra ApplicationSet discovers apps/* directories on main.
|
||||
# The kustomization is included for local previews and single-app validation.
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- fc-dns.yaml
|
||||
169
apps/fc-library/fc-library.yaml
Normal file
169
apps/fc-library/fc-library.yaml
Normal file
@@ -0,0 +1,169 @@
|
||||
# FlowerCore.Library.Web GitOps adoption manifest.
|
||||
#
|
||||
# Authored from the already-live fc-library resources on 2026-06-04.
|
||||
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: library-web-data
|
||||
namespace: fc-library
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-library
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storageClassName: longhorn
|
||||
volumeMode: Filesystem
|
||||
volumeName: pvc-2690bae2-4ee0-417a-b95f-50ec5c632b63
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: library-web
|
||||
namespace: fc-library
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-library
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: library-web
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: library-web-config
|
||||
image: localhost/fc-library-web:v20260602-library-owned-deploy-fix1
|
||||
imagePullPolicy: Never
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
name: library-web
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
name: http
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 6
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
resources: {}
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: data
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext: {}
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: library-web-data
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: library-web
|
||||
namespace: fc-library
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-library
|
||||
spec:
|
||||
clusterIP: 10.43.179.63
|
||||
clusterIPs:
|
||||
- 10.43.179.63
|
||||
internalTrafficPolicy: Cluster
|
||||
ipFamilies:
|
||||
- IPv4
|
||||
ipFamilyPolicy: SingleStack
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 5000
|
||||
selector:
|
||||
app.kubernetes.io/name: library-web
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: library-web-tls
|
||||
namespace: fc-library
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web-tls
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-library
|
||||
spec:
|
||||
dnsNames:
|
||||
- library.iamworkin.lan
|
||||
issuerRef:
|
||||
kind: ClusterIssuer
|
||||
name: step-ca-acme
|
||||
secretName: library-web-tls
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: library-web
|
||||
namespace: fc-library
|
||||
labels:
|
||||
app.kubernetes.io/name: library-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-library
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- kind: Rule
|
||||
match: Host(`library.iamworkin.lan`)
|
||||
services:
|
||||
- name: library-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: library-web-tls
|
||||
296
apps/fc-media/fc-media.yaml
Normal file
296
apps/fc-media/fc-media.yaml
Normal file
@@ -0,0 +1,296 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
---
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: media-oidc-client
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/media-oidc-client"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: fc-media-config
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
data:
|
||||
appsettings.Production.json: |
|
||||
{
|
||||
"DatabaseProvider": "Sqlite",
|
||||
"ConnectionStrings": {
|
||||
"Sqlite": "Data Source=/data/media.db"
|
||||
},
|
||||
"FlowerCore": {
|
||||
"Auth": {
|
||||
"Enabled": true,
|
||||
"Oidc": {
|
||||
"Authority": "https://id.iamworkin.lan/application/o/media/",
|
||||
"ClientId": "media",
|
||||
"ClientSecret": "",
|
||||
"Audience": "media",
|
||||
"RequireHttpsMetadata": true
|
||||
}
|
||||
},
|
||||
"Tenant": {
|
||||
"JwtClaimsEnabled": false,
|
||||
"DefaultTenantHosts": [ "media.iamworkin.lan" ]
|
||||
}
|
||||
},
|
||||
"Media": {
|
||||
"LibraryRoot": "/media/library",
|
||||
"Sources": [
|
||||
{
|
||||
"Name": "BlueJayNAS Video",
|
||||
"Driver": "Nfs",
|
||||
"MountedPath": "/media/library",
|
||||
"RemotePath": "nfs://10.0.58.3/volume1/video",
|
||||
"IsEnabled": true,
|
||||
"IsDefault": true,
|
||||
"Notes": "Synology NFS media share mounted read-only inside the cluster."
|
||||
}
|
||||
],
|
||||
"GeneratedRoot": "/data/generated",
|
||||
"TranscodeRoot": "/data/transcodes",
|
||||
"InboxPath": "/media/inbox",
|
||||
"InboxScanIntervalMinutes": 5,
|
||||
"ScanOnStartup": false,
|
||||
"ComputeChecksums": false,
|
||||
"FfmpegCommand": "ffmpeg",
|
||||
"FfprobeCommand": "ffprobe",
|
||||
"Hls": {
|
||||
"MaxConcurrentJobs": 1
|
||||
},
|
||||
"DefaultViewerName": "BlueJay",
|
||||
"Dlna": {
|
||||
"IsEnabled": true,
|
||||
"MulticastAddress": "239.255.255.250",
|
||||
"Port": 1900,
|
||||
"DiscoveryTimeoutSeconds": 2,
|
||||
"DescriptionFetchTimeoutSeconds": 2,
|
||||
"MaxResponsesPerSearchTarget": 32,
|
||||
"SearchTargets": [
|
||||
"urn:schemas-upnp-org:device:MediaRenderer:1",
|
||||
"urn:schemas-upnp-org:device:MediaServer:1"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: fc-media-data
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
storageClassName: longhorn
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: fc-media-web
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app: fc-media-web
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: fc-media-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: fc-media-web
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "5200"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-server
|
||||
containers:
|
||||
- name: fc-media-web
|
||||
image: localhost/fc-media-web:v20260604-oidc-proper
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5200
|
||||
name: http
|
||||
env:
|
||||
- name: ASPNETCORE_ENVIRONMENT
|
||||
value: Production
|
||||
- name: ASPNETCORE_URLS
|
||||
value: http://+:5200
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Audience
|
||||
value: "media"
|
||||
- name: FlowerCore__Auth__Oidc__ClientId
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: media-oidc-client
|
||||
key: client_id
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: media-oidc-client
|
||||
key: client_secret
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Oidc__Authority
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: media-oidc-client
|
||||
key: issuer_url
|
||||
optional: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: 4Gi
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /app/appsettings.Production.json
|
||||
subPath: appsettings.Production.json
|
||||
readOnly: true
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: transcodes
|
||||
mountPath: /data/transcodes
|
||||
- name: media-library
|
||||
mountPath: /media/library
|
||||
readOnly: true
|
||||
- name: media-inbox
|
||||
mountPath: /media/inbox
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5200
|
||||
httpHeaders:
|
||||
- name: X-Forwarded-Proto
|
||||
value: https
|
||||
failureThreshold: 18
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5200
|
||||
httpHeaders:
|
||||
- name: X-Forwarded-Proto
|
||||
value: https
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 5200
|
||||
httpHeaders:
|
||||
- name: X-Forwarded-Proto
|
||||
value: https
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: fc-media-config
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: fc-media-data
|
||||
- name: transcodes
|
||||
nfs:
|
||||
server: 10.0.58.3
|
||||
path: /volume1/kubernetes/fc-media-transcodes
|
||||
- name: media-inbox
|
||||
nfs:
|
||||
server: 10.0.58.3
|
||||
path: /volume1/kubernetes/fc-media-inbox
|
||||
- name: media-library
|
||||
nfs:
|
||||
server: 10.0.58.3
|
||||
path: /volume1/video
|
||||
readOnly: true
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: fc-media-web
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app: fc-media-web
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: fc-media-web
|
||||
ports:
|
||||
- port: 5200
|
||||
targetPort: 5200
|
||||
protocol: TCP
|
||||
name: http
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: fc-media-tls
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
secretName: fc-media-tls
|
||||
issuerRef:
|
||||
name: step-ca-acme
|
||||
kind: ClusterIssuer
|
||||
dnsNames:
|
||||
- media.iamworkin.lan
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: fc-media-web
|
||||
namespace: fc-media
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-media-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- match: Host(`media.iamworkin.lan`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: fc-media-web
|
||||
port: 5200
|
||||
tls:
|
||||
secretName: fc-media-tls
|
||||
6
apps/fc-media/kustomization.yaml
Normal file
6
apps/fc-media/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# ArgoCD's bluejay-infra ApplicationSet discovers apps/* directories on main.
|
||||
# The kustomization is included for local previews and single-app validation.
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- fc-media.yaml
|
||||
171
apps/fc-redis/fc-redis.yaml
Normal file
171
apps/fc-redis/fc-redis.yaml
Normal file
@@ -0,0 +1,171 @@
|
||||
# fc-redis — SignalR backplane for cross-product event bus
|
||||
#
|
||||
# Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
|
||||
# not Phase C as originally drafted. Operator directive: "Redis can be
|
||||
# deployed just fine as it's another FlowerCore technology we'll want to
|
||||
# manage."
|
||||
#
|
||||
# Phase A scope (this file):
|
||||
# - Single Redis 7.x Alpine pod
|
||||
# - 1Gi Longhorn RWO PVC for AOF persistence
|
||||
# - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
|
||||
# - No AUTH (in-cluster only; not exposed externally)
|
||||
# - No IngressRoute (backplane is server-to-server only)
|
||||
#
|
||||
# Consumers (Phase A IMPL across FC services):
|
||||
# - FlowerCore.Signage.Web (OpsConsoleHub)
|
||||
# - FlowerCore.Scoreboard.Web (ScoreboardHub)
|
||||
# - FlowerCore.SignalControl.Web
|
||||
# - FlowerCore.DMS.Web
|
||||
# - Any other product joining the cross-product event bus
|
||||
#
|
||||
# Each consumer adds:
|
||||
# services.AddSignalR()
|
||||
# .AddStackExchangeRedis(
|
||||
# "redis.fc-redis.svc.cluster.local:6379",
|
||||
# opts => opts.Configuration.ChannelPrefix =
|
||||
# StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
|
||||
#
|
||||
# Phase B / C follow-ons (out of scope here):
|
||||
# - Redis Sentinel for HA (3-node)
|
||||
# - AUTH password from 1Password Connect (rotate via /rotate-password)
|
||||
# - redis_exporter sidecar for Prometheus scrape
|
||||
# - Network policies restricting which namespaces can dial 6379
|
||||
#
|
||||
# Design: docs/signage/operations-console-phase-2-design.md §3.5
|
||||
# Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
|
||||
# Memory: feedback_blooming_ui_pattern_no_iframes
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-redis
|
||||
labels:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: fc-redis-data
|
||||
namespace: fc-redis
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: fc-redis-config
|
||||
namespace: fc-redis
|
||||
data:
|
||||
redis.conf: |
|
||||
# Phase A — minimal config; no AUTH, no replication.
|
||||
bind 0.0.0.0
|
||||
protected-mode no
|
||||
port 6379
|
||||
tcp-backlog 511
|
||||
timeout 0
|
||||
tcp-keepalive 300
|
||||
|
||||
# Persistence: AOF (fsync every second is the standard SignalR-backplane
|
||||
# durability sweet spot — the backplane only needs to survive Redis
|
||||
# restarts, not absolute zero loss).
|
||||
appendonly yes
|
||||
appendfsync everysec
|
||||
auto-aof-rewrite-percentage 100
|
||||
auto-aof-rewrite-min-size 64mb
|
||||
|
||||
# Reasonable defaults — let Redis pick most things.
|
||||
maxmemory-policy allkeys-lru
|
||||
maxmemory 256mb
|
||||
|
||||
# Logging
|
||||
loglevel notice
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: fc-redis
|
||||
namespace: fc-redis
|
||||
labels:
|
||||
app: fc-redis
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate # RWO PVC; do not do rolling update
|
||||
selector:
|
||||
matchLabels:
|
||||
app: fc-redis
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: fc-redis
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 999 # redis:7-alpine default uid
|
||||
runAsGroup: 999
|
||||
fsGroup: 999
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7-alpine
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["redis-server", "/etc/redis/redis.conf"]
|
||||
ports:
|
||||
- name: redis
|
||||
containerPort: 6379
|
||||
resources:
|
||||
requests:
|
||||
cpu: "50m"
|
||||
memory: "128Mi"
|
||||
limits:
|
||||
cpu: "500m"
|
||||
memory: "384Mi"
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: config
|
||||
mountPath: /etc/redis
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: 6379
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command: ["redis-cli", "ping"]
|
||||
initialDelaySeconds: 2
|
||||
periodSeconds: 5
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop: [ALL]
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: fc-redis-data
|
||||
- name: config
|
||||
configMap:
|
||||
name: fc-redis-config
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: redis
|
||||
namespace: fc-redis
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: fc-redis
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
protocol: TCP
|
||||
170
apps/fc-retail/fc-retail.yaml
Normal file
170
apps/fc-retail/fc-retail.yaml
Normal file
@@ -0,0 +1,170 @@
|
||||
# FlowerCore.Retail.Web GitOps adoption manifest.
|
||||
#
|
||||
# Authored from the already-live fc-retail resources on 2026-06-04.
|
||||
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: retail-web-data
|
||||
namespace: fc-retail
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-retail
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
storageClassName: longhorn
|
||||
volumeMode: Filesystem
|
||||
volumeName: pvc-3d40b336-eab4-41b3-812c-d5e9413ce0ab
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: retail-web
|
||||
namespace: fc-retail
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-retail
|
||||
spec:
|
||||
progressDeadlineSeconds: 600
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
strategy:
|
||||
type: Recreate
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: retail-web-config
|
||||
image: localhost/fc-retail-web:v20260602-retail-owned-deploy-fix5
|
||||
imagePullPolicy: Never
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
name: retail-web
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
name: http
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 6
|
||||
httpGet:
|
||||
path: /health
|
||||
port: 5000
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 5
|
||||
resources: {}
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
name: data
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext: {}
|
||||
terminationGracePeriodSeconds: 30
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: retail-web-data
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: retail-web
|
||||
namespace: fc-retail
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-retail
|
||||
spec:
|
||||
clusterIP: 10.43.239.8
|
||||
clusterIPs:
|
||||
- 10.43.239.8
|
||||
internalTrafficPolicy: Cluster
|
||||
ipFamilies:
|
||||
- IPv4
|
||||
ipFamilyPolicy: SingleStack
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 5000
|
||||
selector:
|
||||
app.kubernetes.io/name: retail-web
|
||||
sessionAffinity: None
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: retail-web-tls
|
||||
namespace: fc-retail
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web-tls
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-retail
|
||||
spec:
|
||||
dnsNames:
|
||||
- retail.iamworkin.lan
|
||||
issuerRef:
|
||||
kind: ClusterIssuer
|
||||
name: step-ca-acme
|
||||
secretName: retail-web-tls
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: retail-web
|
||||
namespace: fc-retail
|
||||
labels:
|
||||
app.kubernetes.io/name: retail-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
argocd.argoproj.io/instance: infra-fc-retail
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- kind: Rule
|
||||
match: Host(`retail.iamworkin.lan`)
|
||||
services:
|
||||
- name: retail-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: retail-web-tls
|
||||
14
apps/fc-signage-appletv/README.md
Normal file
14
apps/fc-signage-appletv/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# fc-signage-appletv
|
||||
|
||||
Apple TV signage is a sealed appliance running the `FlowerCore.Signage.Agent.AppleTv` tvOS app per ADR-134.
|
||||
|
||||
This ApplicationSet entry is documentation and inventory metadata only. It intentionally creates no `Deployment`, `Service`, or `Pod`.
|
||||
|
||||
The Apple TV app connects outbound to existing FC.Signage.Web surfaces:
|
||||
|
||||
- `https://signage.iamworkin.lan/hub/signage` for SignalR live status.
|
||||
- `GET /api/v1/nodes/{nodeId}/state` for the 30 second polling fallback.
|
||||
- `POST /api/v1/nodes/register` and `POST /api/v1/nodes/{nodeId}/enroll` for pairing and mTLS enrollment.
|
||||
- `POST /api/v1/nodes/{nodeId}/heartbeat` for metrics, current content identity, and local audit excerpts.
|
||||
|
||||
Distribution is via Apple Developer Enterprise Program or TestFlight plus FC.Distribution / UpdateCenter publishing once Apple credentials are available.
|
||||
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- manifest.yaml
|
||||
26
apps/fc-signage-appletv/manifest.yaml
Normal file
26
apps/fc-signage-appletv/manifest.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
# Apple TV signage is a sealed tvOS appliance. This ArgoCD app intentionally
|
||||
# carries documentation metadata only; no Deployment, Service, or Pod resources
|
||||
# are created for the player.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: fc-signage-appletv-docs
|
||||
namespace: fc-signage
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-signage-appletv
|
||||
app.kubernetes.io/part-of: flowercore-signage
|
||||
flowercore.io/manifest-kind: docs-only
|
||||
data:
|
||||
README: |
|
||||
FlowerCore.Signage.Agent.AppleTv is distributed through Apple Developer
|
||||
Enterprise Program or TestFlight, not Kubernetes.
|
||||
|
||||
The app connects outbound to FC.Signage.Web:
|
||||
- SignalR: https://signage.iamworkin.lan/hub/signage
|
||||
- Polling fallback: GET /api/v1/nodes/{nodeId}/state
|
||||
- Enrollment: POST /api/v1/nodes/{nodeId}/enroll
|
||||
- Heartbeat: POST /api/v1/nodes/{nodeId}/heartbeat
|
||||
|
||||
This placeholder gives ArgoCD and inventory dashboards a first-class
|
||||
Apple TV signage app entry without creating runtime pods.
|
||||
17
apps/fc-signage-pi-player/README.md
Normal file
17
apps/fc-signage-pi-player/README.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# FlowerCore Signage Pi Player
|
||||
|
||||
Phase 1 Raspberry Pi signage player packaging for Chromium kiosk deployments.
|
||||
This bundle is intentionally air-gap friendly: systemd units, shell scripts,
|
||||
udev rules, and Chromium managed policy are all checked into the repo and are
|
||||
installed by `FlowerCore.Puppet`.
|
||||
|
||||
## Scope
|
||||
|
||||
- Bootstrap a stable node identity and mTLS client certificate.
|
||||
- Launch Chromium in kiosk mode against `FC.Signage.Web` player routes.
|
||||
- Restart the kiosk on HDMI hotplug.
|
||||
- Renew mTLS certificates daily when fewer than 30 days remain.
|
||||
- Detect display capabilities at boot, daily, and on HDMI hotplug.
|
||||
|
||||
Phase 2 native Avalonia rendering is documented separately in Notes and remains
|
||||
deferred.
|
||||
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"AutofillAddressEnabled": false,
|
||||
"AutofillCreditCardEnabled": false,
|
||||
"PasswordManagerEnabled": false,
|
||||
"BrowserSignin": 0,
|
||||
"MetricsReportingEnabled": false,
|
||||
"SafeBrowsingProtectionLevel": 0,
|
||||
"DefaultNotificationsSetting": 2,
|
||||
"DefaultPopupsSetting": 2,
|
||||
"BackgroundModeEnabled": false,
|
||||
"DefaultBrowserSettingEnabled": false,
|
||||
"PromotionalTabsEnabled": false,
|
||||
"CommandLineFlagSecurityWarningsEnabled": false,
|
||||
"ExtensionInstallBlocklist": ["*"]
|
||||
}
|
||||
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||
CERT_DIR="/etc/fc-signage-player"
|
||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||
|
||||
CONNECTORS=()
|
||||
for dir in /sys/class/drm/card*-HDMI-A-*; do
|
||||
[[ -e "$dir/status" ]] || continue
|
||||
if [[ "$(cat "$dir/status")" == "connected" ]]; then
|
||||
CONNECTORS+=("$(basename "$dir")")
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ ${#CONNECTORS[@]} -eq 0 ]]; then
|
||||
CAPABILITIES_JSON=$(jq -n --arg id "$NODE_ID" '{
|
||||
nodeId: $id,
|
||||
platform: "linux-arm64-pi",
|
||||
displayConnected: false,
|
||||
detectedAt: (now | todate),
|
||||
note: "No HDMI display detected"
|
||||
}')
|
||||
else
|
||||
PRIMARY="${CONNECTORS[0]}"
|
||||
EDID_PATH="/sys/class/drm/${PRIMARY}/edid"
|
||||
WIDTH=0
|
||||
HEIGHT=0
|
||||
REFRESH=60
|
||||
HDR=false
|
||||
AUDIO_HDMI=false
|
||||
MFG=""
|
||||
MODEL=""
|
||||
PHYSICAL_SIZE=null
|
||||
|
||||
if [[ -s "$EDID_PATH" ]] && command -v edid-decode >/dev/null 2>&1; then
|
||||
EDID_INFO=$(edid-decode < "$EDID_PATH" 2>/dev/null || true)
|
||||
MFG=$(echo "$EDID_INFO" | grep -m1 -oP 'Manufacturer:\s*\K\S+' || true)
|
||||
MODEL=$(echo "$EDID_INFO" | grep -m1 -oP 'Model:\s*\K\S+' || true)
|
||||
PREF=$(echo "$EDID_INFO" | grep -m1 -oP '\d+x\d+\s*@\s*\d+(?:\.\d+)?\s*Hz' || true)
|
||||
if [[ -n "$PREF" ]]; then
|
||||
WIDTH=$(echo "$PREF" | grep -oP '^\d+')
|
||||
HEIGHT=$(echo "$PREF" | grep -oP 'x\K\d+')
|
||||
REFRESH=$(echo "$PREF" | grep -oP '@\s*\K[\d.]+' | cut -d. -f1)
|
||||
fi
|
||||
if echo "$EDID_INFO" | grep -qiE 'HDR (Static|Dynamic) Metadata Block'; then HDR=true; fi
|
||||
if echo "$EDID_INFO" | grep -qiE 'CEA Audio Block|Audio Format Descriptor'; then AUDIO_HDMI=true; fi
|
||||
PH_W=$(echo "$EDID_INFO" | grep -m1 -oP 'Maximum image size:\s*\K\d+\s*cm\s*x\s*\d+' || true)
|
||||
if [[ -n "$PH_W" ]]; then
|
||||
PH_CM_W=$(echo "$PH_W" | grep -oP '^\d+')
|
||||
PH_CM_H=$(echo "$PH_W" | grep -oP 'x\s*\K\d+')
|
||||
if (( PH_CM_W > 0 && PH_CM_H > 0 )); then
|
||||
PHYSICAL_SIZE=$(awk -v w="$PH_CM_W" -v h="$PH_CM_H" 'BEGIN { printf "%.1f", sqrt(w*w + h*h)/2.54 }')
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$WIDTH" == "0" ]] && command -v kmsprint >/dev/null 2>&1; then
|
||||
KMS=$(kmsprint 2>/dev/null | grep -A2 "$PRIMARY" | grep -oP '\d+x\d+' | head -1 || true)
|
||||
if [[ -n "$KMS" ]]; then
|
||||
WIDTH=$(echo "$KMS" | grep -oP '^\d+')
|
||||
HEIGHT=$(echo "$KMS" | grep -oP 'x\K\d+')
|
||||
fi
|
||||
fi
|
||||
|
||||
AUDIO_ALSA=false
|
||||
if aplay -l 2>/dev/null | grep -qi 'card.*HDMI'; then AUDIO_ALSA=true; fi
|
||||
HAS_AUDIO=false
|
||||
if [[ "$AUDIO_HDMI" == "true" && "$AUDIO_ALSA" == "true" ]]; then HAS_AUDIO=true; fi
|
||||
|
||||
CAPABILITIES_JSON=$(jq -n \
|
||||
--arg id "$NODE_ID" \
|
||||
--argjson w "$WIDTH" \
|
||||
--argjson h "$HEIGHT" \
|
||||
--argjson r "$REFRESH" \
|
||||
--argjson hdr "$HDR" \
|
||||
--argjson audio "$HAS_AUDIO" \
|
||||
--arg connector "$PRIMARY" \
|
||||
--arg mfg "$MFG" \
|
||||
--arg model "$MODEL" \
|
||||
--argjson size "$PHYSICAL_SIZE" \
|
||||
'{
|
||||
nodeId: $id,
|
||||
platform: "linux-arm64-pi",
|
||||
displayConnected: true,
|
||||
detectedAt: (now | todate),
|
||||
hardware: {
|
||||
maxResolution: { width: $w, height: $h },
|
||||
nativeResolution: { width: $w, height: $h },
|
||||
refreshRateHz: $r,
|
||||
colorDepth: ($hdr | if . then "Color30Hdr" else "Color24" end),
|
||||
hasAudioOutput: $audio,
|
||||
audioChannelCount: ($audio | if . then 2 else 0 end),
|
||||
physicalSizeInches: $size,
|
||||
connector: $connector,
|
||||
manufacturer: $mfg,
|
||||
modelName: $model
|
||||
},
|
||||
render: { codecs: ["h264", "vp9", "mp4"] }
|
||||
}')
|
||||
fi
|
||||
|
||||
ENDPOINT_CANDIDATES=(
|
||||
"${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/capabilities"
|
||||
"${SIGNAGE_URL}/api/v1/displays/${NODE_ID}/capability-profile"
|
||||
)
|
||||
|
||||
SUCCESS=false
|
||||
for url in "${ENDPOINT_CANDIDATES[@]}"; do
|
||||
HTTP_STATUS=$(curl -sk -o /tmp/cap-response.json -w "%{http_code}" \
|
||||
--max-time 10 \
|
||||
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||
-X POST "$url" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$CAPABILITIES_JSON" || echo "000")
|
||||
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" || "$HTTP_STATUS" == "204" ]]; then
|
||||
SUCCESS=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
mkdir -p /var/log/fc-signage-player
|
||||
if [[ "$SUCCESS" != "true" ]]; then
|
||||
echo "[$(date -Is)] capability declare: no endpoint accepted the profile; logging locally" \
|
||||
| tee -a /var/log/fc-signage-player/capabilities.log
|
||||
echo "$CAPABILITIES_JSON" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||
else
|
||||
echo "[$(date -Is)] capability declare: ok ($url)" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||
fi
|
||||
|
||||
echo "$CAPABILITIES_JSON"
|
||||
@@ -0,0 +1,144 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||
CERT_DIR="/etc/fc-signage-player"
|
||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||
SETUP_CODE_FILE="/etc/flowercore/signage-setup-code"
|
||||
|
||||
mkdir -p /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||
chown fc-signage:fc-signage /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||
chmod 0750 "$CERT_DIR"
|
||||
|
||||
if [[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]; then
|
||||
ENROLLED=$(jq -r '.enrolledAt // empty' "$NODE_JSON")
|
||||
if [[ -n "$ENROLLED" ]]; then
|
||||
echo "[$(date -Is)] bootstrap: already enrolled at $ENROLLED; skipping"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ -s "$NODE_JSON" ]]; then
|
||||
NODE_UUID=$(jq -r '.nodeUuid // empty' "$NODE_JSON")
|
||||
MACHINE_ID=$(jq -r '.machineId // empty' "$NODE_JSON")
|
||||
else
|
||||
NODE_UUID=$(uuidgen)
|
||||
MACHINE_ID=$(echo "$NODE_UUID" | tr -d '-' | cut -c1-16)
|
||||
jq -n --arg uuid "$NODE_UUID" --arg machine "$MACHINE_ID" --arg host "$(hostname -f)" --arg ts "$(date -Is)" \
|
||||
'{nodeUuid: $uuid, machineId: $machine, hostname: $host, platform: "linux-arm64-pi", createdAt: $ts}' \
|
||||
> "$NODE_JSON"
|
||||
chmod 0640 "$NODE_JSON"
|
||||
chown fc-signage:fc-signage "$NODE_JSON"
|
||||
fi
|
||||
|
||||
SETUP_CODE=""
|
||||
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||
SETUP_CODE=$(tr -d '\r\n\t ' < "$SETUP_CODE_FILE")
|
||||
fi
|
||||
|
||||
MODEL=$(tr -d '\0' < /sys/firmware/devicetree/base/model 2>/dev/null || echo Unknown)
|
||||
REG_PAYLOAD=$(jq -n \
|
||||
--arg machine "$MACHINE_ID" \
|
||||
--arg name "$(hostname -f)" \
|
||||
--arg setup "$SETUP_CODE" \
|
||||
--arg resolution "1920x1080" \
|
||||
--arg model "$MODEL" \
|
||||
'{
|
||||
machineId: $machine,
|
||||
name: $name,
|
||||
setupCode: ($setup | if . == "" then null else . end),
|
||||
resolution: $resolution,
|
||||
hardwareModel: $model,
|
||||
platform: "linux-arm64-pi"
|
||||
}')
|
||||
|
||||
for attempt in 1 2; do
|
||||
HTTP_STATUS=$(curl -sk -o /tmp/register-response.json -w "%{http_code}" \
|
||||
--max-time 15 \
|
||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/register" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$REG_PAYLOAD" || echo "000")
|
||||
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" ]]; then
|
||||
break
|
||||
fi
|
||||
echo "[$(date -Is)] bootstrap: register attempt $attempt returned $HTTP_STATUS" >&2
|
||||
sleep 5
|
||||
done
|
||||
|
||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||
echo "[$(date -Is)] bootstrap: register failed after 2 attempts" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
NODE_ID=$(jq -r '.nodeId // empty' /tmp/register-response.json)
|
||||
if [[ -z "$NODE_ID" ]]; then
|
||||
echo "[$(date -Is)] bootstrap: register response did not include nodeId" >&2
|
||||
exit 2
|
||||
fi
|
||||
jq --arg id "$NODE_ID" '.nodeId = $id' "$NODE_JSON" > "${NODE_JSON}.tmp" && mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||
|
||||
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||
curl -sk -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/approve-via-setup-code" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"setupCode\":\"${SETUP_CODE}\"}" \
|
||||
-o /dev/null || true
|
||||
fi
|
||||
|
||||
STATUS=""
|
||||
DEADLINE=$(( $(date +%s) + 1800 ))
|
||||
while (( $(date +%s) < DEADLINE )); do
|
||||
STATUS=$(curl -sk --max-time 5 "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/status" | jq -r '.status // empty')
|
||||
if [[ "$STATUS" == "Approved" || "$STATUS" == "Enrolled" || "$STATUS" == "Online" ]]; then
|
||||
break
|
||||
fi
|
||||
sleep 15
|
||||
done
|
||||
|
||||
if [[ "$STATUS" != "Approved" && "$STATUS" != "Enrolled" && "$STATUS" != "Online" ]]; then
|
||||
echo "[$(date -Is)] bootstrap: approval not granted within 30min budget" >&2
|
||||
exit 3
|
||||
fi
|
||||
|
||||
KEY_PATH="${CERT_DIR}/client.key"
|
||||
CSR_PATH="${CERT_DIR}/client.csr"
|
||||
openssl ecparam -genkey -name prime256v1 -out "$KEY_PATH"
|
||||
openssl req -new -key "$KEY_PATH" -out "$CSR_PATH" \
|
||||
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||
|
||||
ENROLL_PAYLOAD=$(jq -n --arg csr "$(cat "$CSR_PATH")" '{certificateSigningRequest: $csr}')
|
||||
HTTP_STATUS=$(curl -sk -o /tmp/enroll-response.json -w "%{http_code}" \
|
||||
--max-time 15 \
|
||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/enroll" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$ENROLL_PAYLOAD")
|
||||
|
||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||
echo "[$(date -Is)] bootstrap: enroll failed with HTTP $HTTP_STATUS" >&2
|
||||
exit 4
|
||||
fi
|
||||
|
||||
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/client.crt"
|
||||
jq -r '.caCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/ca-chain.pem"
|
||||
P12_PASS=$(openssl rand -hex 24)
|
||||
echo -n "$P12_PASS" > "${CERT_DIR}/client.p12.pass"
|
||||
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||
|
||||
openssl pkcs12 -export \
|
||||
-inkey "$KEY_PATH" \
|
||||
-in "${CERT_DIR}/client.crt" \
|
||||
-certfile "${CERT_DIR}/ca-chain.pem" \
|
||||
-out "${CERT_DIR}/client.p12" \
|
||||
-password "pass:${P12_PASS}"
|
||||
|
||||
chown fc-signage:fc-signage "${CERT_DIR}"/* "$NODE_JSON"
|
||||
chmod 0640 "${CERT_DIR}/client.p12" "${CERT_DIR}/client.crt" "${CERT_DIR}/ca-chain.pem" "$KEY_PATH"
|
||||
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||
|
||||
EXPIRY=$(openssl x509 -in "${CERT_DIR}/client.crt" -enddate -noout | sed 's/notAfter=//')
|
||||
jq --arg ts "$(date -Is)" --arg exp "$EXPIRY" \
|
||||
'.enrolledAt = $ts | .certExpiry = $exp' "$NODE_JSON" > "${NODE_JSON}.tmp" \
|
||||
&& mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||
|
||||
systemctl start flowercore-signage-detect-display.service || true
|
||||
systemctl start flowercore-signage-player-pi.service || true
|
||||
echo "[$(date -Is)] bootstrap: enrolled and kiosk started (NodeId=${NODE_ID})"
|
||||
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
sleep 2
|
||||
systemctl start flowercore-signage-detect-display.service || true
|
||||
systemctl restart flowercore-signage-player-pi.service
|
||||
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||
CERT_DIR="/etc/fc-signage-player"
|
||||
|
||||
CERT_THUMB=$(openssl pkcs12 -in "$CERT_DIR/client.p12" -passin file:"$CERT_DIR/client.p12.pass" -nodes -nokeys 2>/dev/null \
|
||||
| openssl x509 -fingerprint -sha256 -noout \
|
||||
| sed 's/.*=//' \
|
||||
| tr -d ':')
|
||||
|
||||
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}/embed?token=${CERT_THUMB}"
|
||||
HTTP_STATUS=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 5 \
|
||||
--cert-type P12 --cert "$CERT_DIR/client.p12:$(cat "$CERT_DIR/client.p12.pass")" \
|
||||
"$PLAYER_URL" || echo "000")
|
||||
|
||||
mkdir -p /var/log/fc-signage-player
|
||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "301" && "$HTTP_STATUS" != "302" ]]; then
|
||||
echo "[$(date -Is)] /embed returned $HTTP_STATUS; falling back to /player/${NODE_ID}" \
|
||||
>> /var/log/fc-signage-player/url-divergence.log
|
||||
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}?token=${CERT_THUMB}"
|
||||
fi
|
||||
|
||||
exec chromium-browser \
|
||||
--kiosk \
|
||||
--noerrdialogs \
|
||||
--disable-infobars \
|
||||
--disable-translate \
|
||||
--disable-features=TranslateUI,InfiniteSessionRestore \
|
||||
--autoplay-policy=no-user-gesture-required \
|
||||
--password-store=basic \
|
||||
--user-data-dir=/var/lib/fc-signage-player/profile \
|
||||
--disk-cache-dir=/var/lib/fc-signage-player/cache \
|
||||
--disk-cache-size=104857600 \
|
||||
--no-first-run \
|
||||
--no-default-browser-check \
|
||||
--check-for-update-interval=2592000 \
|
||||
--enable-features=OverlayScrollbar \
|
||||
--start-fullscreen \
|
||||
--window-position=0,0 \
|
||||
--window-size=1920,1080 \
|
||||
"$PLAYER_URL"
|
||||
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p /var/log/fc-signage-player
|
||||
|
||||
for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass; do
|
||||
if [[ ! -r "$f" ]]; then
|
||||
echo "[$(date -Is)] prelaunch: missing or unreadable $f" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
if openssl pkcs12 -in /etc/fc-signage-player/client.p12 -passin file:/etc/fc-signage-player/client.p12.pass -nokeys -clcerts 2>/dev/null \
|
||||
| openssl x509 -checkend $((7*24*3600)) -noout; then
|
||||
:
|
||||
else
|
||||
echo "[$(date -Is)] prelaunch: client cert expires within 7 days" >&2
|
||||
fi
|
||||
|
||||
echo "[$(date -Is)] prelaunch: ok" | tee -a /var/log/fc-signage-player/prelaunch.log
|
||||
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
CERT_DIR="/etc/fc-signage-player"
|
||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||
|
||||
[[ -s "$CERT_DIR/client.crt" ]] || { echo "no cert to renew"; exit 0; }
|
||||
|
||||
if openssl x509 -in "$CERT_DIR/client.crt" -checkend $((30*24*3600)) -noout; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||
NEW_KEY="$CERT_DIR/client.key.new"
|
||||
NEW_CSR="$CERT_DIR/client.csr.new"
|
||||
|
||||
openssl ecparam -genkey -name prime256v1 -out "$NEW_KEY"
|
||||
openssl req -new -key "$NEW_KEY" -out "$NEW_CSR" \
|
||||
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||
|
||||
HTTP_STATUS=$(curl -sk -o /tmp/renew-response.json -w "%{http_code}" \
|
||||
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/renew" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$(jq -n --arg csr "$(cat "$NEW_CSR")" '{certificateSigningRequest: $csr}')")
|
||||
|
||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||
echo "[$(date -Is)] renew: failed HTTP $HTTP_STATUS; leaving old cert in place" >&2
|
||||
exit 5
|
||||
fi
|
||||
|
||||
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/renew-response.json > "$CERT_DIR/client.crt.new"
|
||||
jq -r '.caCertificatePem' /tmp/renew-response.json > "$CERT_DIR/ca-chain.pem.new"
|
||||
P12_PASS=$(cat "$CERT_DIR/client.p12.pass")
|
||||
openssl pkcs12 -export -inkey "$NEW_KEY" -in "$CERT_DIR/client.crt.new" \
|
||||
-certfile "$CERT_DIR/ca-chain.pem.new" \
|
||||
-out "$CERT_DIR/client.p12.new" -password "pass:${P12_PASS}"
|
||||
|
||||
mv "$CERT_DIR/client.key.new" "$CERT_DIR/client.key"
|
||||
mv "$CERT_DIR/client.crt.new" "$CERT_DIR/client.crt"
|
||||
mv "$CERT_DIR/ca-chain.pem.new" "$CERT_DIR/ca-chain.pem"
|
||||
mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"
|
||||
|
||||
chown fc-signage:fc-signage "$CERT_DIR"/client.*
|
||||
systemctl restart flowercore-signage-player-pi.service
|
||||
@@ -0,0 +1,2 @@
|
||||
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
|
||||
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
|
||||
@@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Signage Pi: first-boot identity + mTLS enrollment
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
Before=flowercore-signage-player-pi.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/flowercore-signage-bootstrap.sh
|
||||
RemainAfterExit=yes
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
TimeoutStartSec=2100
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -0,0 +1,8 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Signage Pi: detect connected display + declare capabilities
|
||||
After=flowercore-signage-bootstrap.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=fc-signage
|
||||
ExecStart=/usr/local/bin/fc-signage-detect-display
|
||||
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Daily FlowerCore Signage Pi display capability redeclaration
|
||||
|
||||
[Timer]
|
||||
OnCalendar=daily
|
||||
RandomizedDelaySec=1h
|
||||
Persistent=true
|
||||
OnBootSec=30s
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
@@ -0,0 +1,7 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Signage Pi Player HDMI hotplug responder
|
||||
DefaultDependencies=no
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/flowercore-signage-hdmi-respond.sh
|
||||
@@ -0,0 +1,30 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Digital Signage Pi Player (Chromium kiosk)
|
||||
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/appletv-pi-signage-agents-design.md
|
||||
Wants=network-online.target
|
||||
After=network-online.target graphical.target
|
||||
ConditionPathExists=/etc/flowercore/signage-node.json
|
||||
ConditionPathExists=/etc/fc-signage-player/client.p12
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=fc-signage
|
||||
Group=fc-signage
|
||||
WorkingDirectory=/var/lib/fc-signage-player
|
||||
EnvironmentFile=-/etc/flowercore/signage-player.env
|
||||
ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh
|
||||
ExecStart=/usr/local/bin/flowercore-signage-launch.sh
|
||||
Restart=always
|
||||
RestartSec=10s
|
||||
StartLimitBurst=5
|
||||
StartLimitIntervalSec=300s
|
||||
MemoryMax=2G
|
||||
MemoryHigh=1500M
|
||||
ProtectSystem=strict
|
||||
ProtectHome=true
|
||||
ReadWritePaths=/var/lib/fc-signage-player /var/log/fc-signage-player
|
||||
PrivateTmp=true
|
||||
NoNewPrivileges=true
|
||||
|
||||
[Install]
|
||||
WantedBy=graphical.target
|
||||
@@ -0,0 +1,6 @@
|
||||
[Unit]
|
||||
Description=FlowerCore Signage Pi: cert renewal worker
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/local/bin/flowercore-signage-renew-cert.sh
|
||||
@@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=Daily check for FlowerCore Signage Pi cert renewal
|
||||
|
||||
[Timer]
|
||||
OnCalendar=daily
|
||||
RandomizedDelaySec=2h
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
|
||||
}
|
||||
|
||||
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
|
||||
script="$(cat "$DETECT")"
|
||||
[[ "$script" == *"displayConnected: false"* ]]
|
||||
[[ "$script" == *"No HDMI display detected"* ]]
|
||||
}
|
||||
|
||||
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
|
||||
script="$(cat "$DETECT")"
|
||||
[[ "$script" == *"edid-decode"* ]]
|
||||
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
|
||||
[[ "$script" == *"kmsprint"* ]]
|
||||
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
|
||||
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
|
||||
[[ "$script" == *"capabilities.log"* ]]
|
||||
}
|
||||
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
|
||||
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
|
||||
}
|
||||
|
||||
@test "bootstrap is idempotent when node is already enrolled" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
|
||||
[[ "$script" == *"already enrolled"* ]]
|
||||
[[ "$script" == *"exit 0"* ]]
|
||||
}
|
||||
|
||||
@test "bootstrap generates a stable node uuid and machine id" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *"uuidgen"* ]]
|
||||
[[ "$script" == *"nodeUuid"* ]]
|
||||
[[ "$script" == *"machineId"* ]]
|
||||
[[ "$script" == *"cut -c1-16"* ]]
|
||||
}
|
||||
|
||||
@test "bootstrap posts to the canonical register endpoint" {
|
||||
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
|
||||
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
|
||||
}
|
||||
|
||||
@test "bootstrap retries registration once for first-call races" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *"for attempt in 1 2"* ]]
|
||||
[[ "$script" == *"register attempt \$attempt returned"* ]]
|
||||
[[ "$script" == *"sleep 5"* ]]
|
||||
}
|
||||
|
||||
@test "bootstrap supports setup-code approval with manual polling fallback" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *"signage-setup-code"* ]]
|
||||
[[ "$script" == *"approve-via-setup-code"* ]]
|
||||
[[ "$script" == *"+ 1800"* ]]
|
||||
[[ "$script" == *"sleep 15"* ]]
|
||||
}
|
||||
|
||||
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
|
||||
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
|
||||
}
|
||||
|
||||
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
|
||||
script="$(cat "$BOOTSTRAP")"
|
||||
[[ "$script" == *"openssl pkcs12 -export"* ]]
|
||||
[[ "$script" == *"client.p12.pass"* ]]
|
||||
[[ "$script" == *"chmod 0640"* ]]
|
||||
[[ "$script" == *"chmod 0600"* ]]
|
||||
}
|
||||
|
||||
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
|
||||
script="$(cat "$RENEW")"
|
||||
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
|
||||
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
|
||||
[[ "$script" == *"client.key.new"* ]]
|
||||
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
|
||||
}
|
||||
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env bats
|
||||
|
||||
setup() {
|
||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||
}
|
||||
|
||||
@test "player unit exists" {
|
||||
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
|
||||
}
|
||||
|
||||
@test "player unit uses simple chromium service with restart backoff" {
|
||||
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
|
||||
[[ "$unit" == *"Type=simple"* ]]
|
||||
[[ "$unit" == *"Restart=always"* ]]
|
||||
[[ "$unit" == *"RestartSec=10s"* ]]
|
||||
[[ "$unit" == *"StartLimitBurst=5"* ]]
|
||||
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
|
||||
}
|
||||
|
||||
@test "player unit caps chromium memory at two gigabytes" {
|
||||
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
}
|
||||
|
||||
@test "player unit condition-gates startup on identity and p12 certificate" {
|
||||
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
}
|
||||
|
||||
@test "player unit runs prelaunch checks before chromium" {
|
||||
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||
}
|
||||
|
||||
@test "hdmi udev rule routes through the two-second settle service" {
|
||||
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
|
||||
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
|
||||
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
|
||||
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||
}
|
||||
|
||||
@test "hdmi responder settles, declares display, then restarts chromium" {
|
||||
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
|
||||
[[ "$responder" == *"sleep 2"* ]]
|
||||
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
|
||||
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||
}
|
||||
|
||||
@test "chromium policy json is valid and disables credential prompts" {
|
||||
command -v jq >/dev/null || skip "jq not installed"
|
||||
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
|
||||
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
|
||||
}
|
||||
|
||||
@test "launch script tries embed URL and logs bare-player fallback" {
|
||||
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
|
||||
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
|
||||
[[ "$launch" == *"url-divergence.log"* ]]
|
||||
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
|
||||
}
|
||||
|
||||
@test "prelaunch script validates required node and cert files" {
|
||||
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
|
||||
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
|
||||
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
|
||||
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
|
||||
[[ "$prelaunch" == *"exit 1"* ]]
|
||||
}
|
||||
@@ -532,7 +532,7 @@ spec:
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-ttsreader-web:v20260506-phase6
|
||||
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5217
|
||||
@@ -554,10 +554,16 @@ spec:
|
||||
value: "/data/chapter-context.db"
|
||||
- name: TtsReader__Jobs__Root
|
||||
value: "/data/jobs"
|
||||
- name: TtsReader__Export__LocalCasRoot
|
||||
value: "/data/bundles/cas"
|
||||
- name: TtsReader__Piper__Host
|
||||
value: "ttsreader-piper.fc-ttsreader.svc.cluster.local."
|
||||
value: "10.0.57.17"
|
||||
- name: TtsReader__Piper__Port
|
||||
value: "10200"
|
||||
value: "8500"
|
||||
- name: TtsReader__Piper__Transport
|
||||
value: "http"
|
||||
- name: TtsReader__Piper__HttpPath
|
||||
value: "/tts"
|
||||
- name: TtsReader__Kokoro__Enabled
|
||||
value: "true"
|
||||
- name: TtsReader__Kokoro__BaseUrl
|
||||
|
||||
@@ -58,7 +58,7 @@ spec:
|
||||
nodeName: rke2-server
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-updater-web:v20260508-pub3-deepening-2bdf108
|
||||
image: localhost/fc-updater-web:v202605310029-7974fc4
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
@@ -88,6 +88,8 @@ spec:
|
||||
value: Faith AI Mike Edition
|
||||
- name: FlowerCore__Updater__PublicShares__Links__0__Description
|
||||
value: Private release link for Mike's Faith AI bundle.
|
||||
- name: FlowerCore__Audit__Sinks__Loki__Enabled
|
||||
value: "false"
|
||||
- name: FlowerCore__Updater__Auth__Bootstrap__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Updater__Auth__Bootstrap__Username
|
||||
|
||||
2
apps/github-runner/.gitattributes
vendored
Normal file
2
apps/github-runner/.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.sh text eol=lf
|
||||
Dockerfile text eol=lf
|
||||
54
apps/github-runner/Dockerfile
Normal file
54
apps/github-runner/Dockerfile
Normal file
@@ -0,0 +1,54 @@
|
||||
FROM myoung34/github-runner:latest
|
||||
|
||||
ARG RUBY_VERSION=3.3.11
|
||||
ARG RUBY_MINOR=3.3
|
||||
ARG RUBY_BUILD_VERSION=v20260326
|
||||
ARG RUNNER_UID=1001
|
||||
ARG RUNNER_GID=1001
|
||||
|
||||
ENV RUNNER_TOOL_CACHE=/home/runner/_tool
|
||||
ENV RUNNER_RUBY_TOOLCACHE=/opt/runner-toolcache
|
||||
ENV PATH="/home/runner/_tool/Ruby/${RUBY_MINOR}/x64/bin:/opt/runner-toolcache/Ruby/${RUBY_MINOR}/x64/bin:${PATH}"
|
||||
|
||||
USER root
|
||||
|
||||
# Bake the IAmWorkin step-ca root CA into the system trust store. Without
|
||||
# this, .NET HttpClient calls from CI tests against *.iamworkin.lan
|
||||
# (e.g. https://selenium.iamworkin.lan/session) fail with `PartialChain`
|
||||
# because the runner image's default Ubuntu trust bundle doesn't include
|
||||
# our internal Root CA. update-ca-certificates regenerates
|
||||
# /etc/ssl/certs/ca-certificates.crt, which OpenSSL + .NET on Linux read
|
||||
# automatically — no SSL_CERT_FILE env var needed.
|
||||
COPY step-ca-root.crt /usr/local/share/ca-certificates/iamworkin-step-ca-root.crt
|
||||
|
||||
RUN apt-get update \
|
||||
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
autoconf \
|
||||
bison \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
curl \
|
||||
libdb-dev \
|
||||
libffi-dev \
|
||||
libgdbm-dev \
|
||||
libgmp-dev \
|
||||
libncurses-dev \
|
||||
libreadline-dev \
|
||||
libssl-dev \
|
||||
libyaml-dev \
|
||||
patch \
|
||||
pkg-config \
|
||||
uuid-dev \
|
||||
zlib1g-dev \
|
||||
&& update-ca-certificates \
|
||||
&& curl -fsSL "https://github.com/rbenv/ruby-build/archive/refs/tags/${RUBY_BUILD_VERSION}.tar.gz" -o /tmp/ruby-build.tar.gz \
|
||||
&& mkdir -p /tmp/ruby-build \
|
||||
&& tar -xzf /tmp/ruby-build.tar.gz --strip-components=1 -C /tmp/ruby-build \
|
||||
&& /tmp/ruby-build/install.sh \
|
||||
&& rm -rf /tmp/ruby-build /tmp/ruby-build.tar.gz /var/lib/apt/lists/*
|
||||
|
||||
COPY install-ruby-toolcache.sh /usr/local/bin/install-ruby-toolcache.sh
|
||||
|
||||
RUN chmod +x /usr/local/bin/install-ruby-toolcache.sh \
|
||||
&& RUBY_VERSION="${RUBY_VERSION}" RUBY_MINOR="${RUBY_MINOR}" TOOLCACHE_ROOT="${RUNNER_RUBY_TOOLCACHE}" RUNNER_UID="${RUNNER_UID}" RUNNER_GID="${RUNNER_GID}" /usr/local/bin/install-ruby-toolcache.sh \
|
||||
&& ruby -v
|
||||
133
apps/github-runner/README.md
Normal file
133
apps/github-runner/README.md
Normal file
@@ -0,0 +1,133 @@
|
||||
# GitHub Runner Fleet
|
||||
|
||||
ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
|
||||
Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
|
||||
|
||||
## Runner Shape
|
||||
|
||||
All repo-scoped Linux runners use:
|
||||
|
||||
- `localhost/fc-github-runner:v20260525-ruby3.3.11-stepca`, derived from
|
||||
`myoung34/github-runner:latest`
|
||||
- `ACCESS_TOKEN` from the `github-runner-token` Secret
|
||||
- `RUN_AS_ROOT=false`
|
||||
- `EPHEMERAL=true`
|
||||
- `LABELS=self-hosted,linux,fc-build-linux`
|
||||
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
|
||||
Actions tool cache
|
||||
- Ruby 3.3.11 seeded into `/home/runner/_tool/Ruby/3.3/x64` from the baked
|
||||
`/opt/runner-toolcache` copy so `ruby/setup-ruby@v1` can discover it on
|
||||
self-hosted `ubuntu-20.04-x64` runners
|
||||
|
||||
`github-runner` for `FlowerCore.Common` is single-replica because it retains the
|
||||
original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
|
||||
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
|
||||
strategy: no two pods share one RWO PVC.
|
||||
|
||||
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
||||
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
|
||||
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
|
||||
`FlowerCore.Distribution`, `FlowerCore.Scoreboard`,
|
||||
`FlowerCore.SegmentDisplay`, `FlowerCore.Signage.Contracts`,
|
||||
`FlowerCore.SignalControl`, `FlowerCore.Intranet.Web`,
|
||||
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
|
||||
`FlowerCore.MenuBoard`.
|
||||
|
||||
## Image Build
|
||||
|
||||
Ruby is baked with a pinned `ruby-build` release and Ruby patch version. The pod
|
||||
still mounts an `emptyDir` over `/home/runner`, so the `setup-runner-home` init
|
||||
container copies the baked toolcache from `/opt/runner-toolcache/Ruby` into
|
||||
`/home/runner/_tool/Ruby` before the runner container starts.
|
||||
|
||||
The IAmWorkin step-ca root CA is also baked into the system trust store
|
||||
(`/usr/local/share/ca-certificates/iamworkin-step-ca-root.crt`, registered by
|
||||
`update-ca-certificates`). Without it, .NET HttpClient calls from CI tests
|
||||
against `*.iamworkin.lan` (e.g. `https://selenium.iamworkin.lan/session`)
|
||||
fail with `PartialChain`. To refresh the bundled cert when the root rotates,
|
||||
re-extract from the cluster and overwrite `step-ca-root.crt`:
|
||||
|
||||
```bash
|
||||
kubectl get secret -n cert-manager step-ca-root \
|
||||
-o jsonpath='{.data.ca\.crt}' | base64 -d > step-ca-root.crt
|
||||
```
|
||||
|
||||
```bash
|
||||
cd apps/github-runner
|
||||
podman build -t localhost/fc-github-runner:v20260525-ruby3.3.11-stepca .
|
||||
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca ruby -v
|
||||
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||
test -f /opt/runner-toolcache/Ruby/3.3/x64.complete
|
||||
podman save localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||
-o fc-github-runner-v20260525-ruby3.3.11-stepca.tar
|
||||
```
|
||||
|
||||
Import the saved image on every schedulable RKE2 node before ArgoCD rolls the
|
||||
Deployments:
|
||||
|
||||
```bash
|
||||
for node in rke2-server rke2-agent1 rke2-agent2; do
|
||||
scp fc-github-runner-v20260525-ruby3.3.11-stepca.tar "$node:/tmp/"
|
||||
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca || true'
|
||||
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-github-runner-v20260525-ruby3.3.11-stepca.tar'
|
||||
done
|
||||
```
|
||||
|
||||
## Post-Merge Proof
|
||||
|
||||
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
||||
|
||||
```bash
|
||||
kubectl -n github-runner get deploy,pods,pvc
|
||||
```
|
||||
|
||||
Verify the Ruby toolcache in a fresh pod:
|
||||
|
||||
```bash
|
||||
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- ruby -v
|
||||
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- sh -c \
|
||||
'echo "$RUNNER_TOOL_CACHE" && test -f "$RUNNER_TOOL_CACHE/Ruby/3.3/x64.complete"'
|
||||
```
|
||||
|
||||
Verify GitHub registration for the repo-scoped runners:
|
||||
|
||||
```bash
|
||||
for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
|
||||
FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
|
||||
FlowerCore.MySQL FlowerCore.Kiosk.Linux FlowerCore.Marquee FlowerCore.TtsReader \
|
||||
FlowerCore.Knowledge FlowerCore.LlmBridge FlowerCore.Media \
|
||||
FlowerCore.Presentations FlowerCore.RemoteDesktop FlowerCore.DNS \
|
||||
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
|
||||
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
|
||||
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
|
||||
FlowerCore.MenuBoard; do
|
||||
echo "=== $repo ==="
|
||||
gh api "/repos/astoltz/$repo/actions/runners" \
|
||||
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
||||
done
|
||||
```
|
||||
|
||||
Shared.Pos publish proof after the runner pod is online:
|
||||
|
||||
```bash
|
||||
gh run list --repo astoltz/FlowerCore.Shared.Pos \
|
||||
--workflow "Build, Test & Publish" --branch main --limit 5
|
||||
```
|
||||
|
||||
If the latest run is still queued after runner registration, rerun the workflow
|
||||
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
||||
|
||||
## Failure Notes
|
||||
|
||||
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
||||
`DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
|
||||
present on the runner pod.
|
||||
- `ruby/setup-ruby@v1` says self-hosted runners must install Ruby in
|
||||
`$RUNNER_TOOL_CACHE`: check that the init container copied
|
||||
`/opt/runner-toolcache/Ruby` into `/home/runner/_tool/Ruby` and that
|
||||
`/home/runner/_tool/Ruby/3.3/x64.complete` exists.
|
||||
- `404` during runner registration: the fine-grained PAT is valid but missing
|
||||
repository access for that repo. Add the repo to the PAT access list; the PAT
|
||||
value does not change.
|
||||
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
|
||||
stay single-replica. New multi-replica runners use `emptyDir`.
|
||||
4592
apps/github-runner/github-runner.yaml
Normal file
4592
apps/github-runner/github-runner.yaml
Normal file
File diff suppressed because it is too large
Load Diff
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
RUBY_VERSION="${RUBY_VERSION:-3.3.11}"
|
||||
RUBY_MINOR="${RUBY_MINOR:-3.3}"
|
||||
TOOLCACHE_ROOT="${TOOLCACHE_ROOT:-/opt/runner-toolcache}"
|
||||
RUNNER_UID="${RUNNER_UID:-1001}"
|
||||
RUNNER_GID="${RUNNER_GID:-1001}"
|
||||
RUBY_PREFIX="${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64"
|
||||
|
||||
mkdir -p "${TOOLCACHE_ROOT}/Ruby"
|
||||
RUBY_CONFIGURE_OPTS="${RUBY_CONFIGURE_OPTS:---disable-install-doc --disable-yjit}" ruby-build "${RUBY_VERSION}" "${RUBY_PREFIX}"
|
||||
|
||||
touch "${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64.complete"
|
||||
ln -sfn "${RUBY_VERSION}" "${TOOLCACHE_ROOT}/Ruby/${RUBY_MINOR}"
|
||||
|
||||
"${RUBY_PREFIX}/bin/ruby" -v
|
||||
chown -R "${RUNNER_UID}:${RUNNER_GID}" "${TOOLCACHE_ROOT}"
|
||||
chmod -R a+rX "${TOOLCACHE_ROOT}"
|
||||
12
apps/github-runner/step-ca-root.crt
Normal file
12
apps/github-runner/step-ca-root.crt
Normal file
@@ -0,0 +1,12 @@
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIIBxDCCAWqgAwIBAgIRAPY357G6ow6zMAL5+4bS2kkwCgYIKoZIzj0EAwIwQDEa
|
||||
MBgGA1UEChMRSUFtV29ya2luIEFDTUUgQ0ExIjAgBgNVBAMTGUlBbVdvcmtpbiBB
|
||||
Q01FIENBIFJvb3QgQ0EwHhcNMjYwMzA4MTgwNzExWhcNMzYwMzA1MTgwNzExWjBA
|
||||
MRowGAYDVQQKExFJQW1Xb3JraW4gQUNNRSBDQTEiMCAGA1UEAxMZSUFtV29ya2lu
|
||||
IEFDTUUgQ0EgUm9vdCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABJ2n04X1
|
||||
JZo5Zdq/i1Idv8+fqwZyAzBh7whbqj0SWsJL8UWRabCMqYCs7+dXO0xRSzqkwFDL
|
||||
x+vooOai8RgRNhajRTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/
|
||||
AgEBMB0GA1UdDgQWBBRnuPPQR6iM/H6vOluiU3Sygayz8jAKBggqhkjOPQQDAgNI
|
||||
ADBFAiEArQK9dYPGmAZsdYnjziuFVVE5NKZUcceYvGfGC+tLXUsCIAudF2zJrCRq
|
||||
3mK50ZZET/fwTkJwiEF4824mjP8p1CKM
|
||||
-----END CERTIFICATE-----
|
||||
@@ -466,11 +466,11 @@ spec:
|
||||
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
||||
---
|
||||
---
|
||||
# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
|
||||
# 1Password-backed credentials for Mac mini VNC access (Phase 1 <EFBFBD> 2026-04-28)
|
||||
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
||||
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
||||
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
||||
# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
|
||||
# Actual IP is 10.0.56.115 (INFRA VLAN) <EFBFBD> the 1P item 'IP' field is kept as backup reference.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
@@ -481,6 +481,7 @@ metadata:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
itemPath: vaults/IAmWorkin/items/Mac Mini
|
||||
---
|
||||
# Blue Jay Branding Extension (CSS + translations)
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
|
||||
@@ -46,7 +46,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: intranet-web
|
||||
image: localhost/fc-intranet-web:v20260508-brochure-w1
|
||||
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5300
|
||||
|
||||
@@ -93,6 +93,7 @@ spec:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
flowercore.io/healthz-auth-policy: "allow-anonymous"
|
||||
spec:
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
@@ -102,7 +103,7 @@ spec:
|
||||
- name: web
|
||||
# Placeholder tag — bump to the image you built + imported to ALL
|
||||
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
||||
image: localhost/fc-knowledge-web:v20260429232635
|
||||
image: localhost/fc-knowledge-web:v20260603-oidc-authentik-auditfix
|
||||
imagePullPolicy: Never
|
||||
command:
|
||||
- /bin/sh
|
||||
@@ -123,6 +124,25 @@ spec:
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
# AuthentiK/OIDC is enforced. /healthz stays anonymous by contract;
|
||||
# see flowercore.io/healthz-auth-policy above and the Sprint 58
|
||||
# OIDC readiness probe audit.
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Authority
|
||||
value: "https://id.iamworkin.lan/application/o/knowledge/"
|
||||
- name: FlowerCore__Auth__Oidc__Audience
|
||||
value: "knowledge"
|
||||
- name: FlowerCore__Auth__Oidc__ClientId
|
||||
value: "knowledge"
|
||||
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: knowledge-oidc-client
|
||||
key: client_secret
|
||||
optional: true
|
||||
# Vector-store directory + embedding model + edition profile dir.
|
||||
# Profile JSON is baked into the image at /home/app/editions via the
|
||||
# csproj Content-link from FlowerCore.Common/editions/.
|
||||
@@ -134,6 +154,8 @@ spec:
|
||||
value: "5"
|
||||
- name: Knowledge__MaxLimit
|
||||
value: "50"
|
||||
- name: Knowledge__Federation__DatabasePath
|
||||
value: "/data/vector-stores/knowledge-federation.db"
|
||||
- name: FlowerCore__Editions__ProfileDirectory
|
||||
value: "/home/app/editions"
|
||||
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
|
||||
|
||||
@@ -1,51 +1,9 @@
|
||||
# =============================================================================
|
||||
# ci1 — Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||
# ci1 - Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||
# =============================================================================
|
||||
# Purpose: dedicated CI runner for FlowerCore.Updater Sandbox E2E nightly +
|
||||
# future fleet WPF AAT lanes. Replaces the never-registered
|
||||
# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
|
||||
# BLUEJAY-WS registered as a runner (workstation has personal/operator state).
|
||||
#
|
||||
# Storage layout (2026-05-08):
|
||||
# * ISO is now sourced from Synology NFS (Path B) — see
|
||||
# win2025-iso-nfs-pv.yaml. The Longhorn Filesystem PVC
|
||||
# `windows-server-2025-iso` below is RETAINED but UNUSED so the prior
|
||||
# CDI upload state is preserved as a fallback (and so ArgoCD doesn't
|
||||
# prune it on this commit). It can be deleted in a follow-up commit
|
||||
# after the NFS path is proven on a successful Windows install.
|
||||
#
|
||||
# Status (2026-05-08): LIVE — Phase 1 prereqs satisfied:
|
||||
# * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes
|
||||
# (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy)
|
||||
# * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet
|
||||
# `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward)
|
||||
# * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to
|
||||
# PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete)
|
||||
# * Local Administrator password generated, stored in 1Password vault
|
||||
# IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu
|
||||
# * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/
|
||||
# prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5
|
||||
# host bridge work lands (Puppet br-prod + enp86s0.57); switching is a
|
||||
# one-line YAML edit + git push.
|
||||
#
|
||||
# See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
|
||||
#
|
||||
# Network choice in this draft: **pod-network fallback** (Calico default).
|
||||
# Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
|
||||
# polls GitHub Actions over HTTPS; no inbound listener needed). Switch to a
|
||||
# Multus PROD VLAN NetworkAttachmentDefinition once Multus is installed and the
|
||||
# operator wants L2 access from `ci1` to other PROD VLAN services.
|
||||
#
|
||||
# Sizing: 8 vCPU / 16 GB RAM / 200 GB disk on Longhorn (default storageClass).
|
||||
# Capacity check 2026-05-08: each RKE2 node has 16 vCPU / ~64Gi allocatable;
|
||||
# 8 vCPU is ~17% of one node's allocatable, fits comfortably.
|
||||
#
|
||||
# Apply (after operator approval + ISO loaded):
|
||||
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/ci1.yaml
|
||||
#
|
||||
# Connect to console for Windows install:
|
||||
# virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml vnc ci1 -n kubevirt-vms
|
||||
# (Or via Guacamole once a connection profile is added.)
|
||||
# Boots from the sysprepped containerDisk template built by the Windows VM
|
||||
# sysprep pipeline. See docs/infrastructure/windows-vm-sysprep-pipeline.md.
|
||||
# Path A/B/C install history is preserved in git log only.
|
||||
# =============================================================================
|
||||
|
||||
apiVersion: v1
|
||||
@@ -57,248 +15,6 @@ metadata:
|
||||
pod-security.kubernetes.io/enforce: privileged
|
||||
|
||||
---
|
||||
# ISO PVC — populated via CDI virtctl image-upload (CDI is now installed).
|
||||
#
|
||||
# **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to
|
||||
# `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA
|
||||
# CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107
|
||||
# with `capabilities.drop: [ALL]` and cannot open the underlying block
|
||||
# device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`).
|
||||
# Reverted to Filesystem PVC pending one of:
|
||||
# - CDI deployment override granting CAP_SYS_RAWIO to upload pod
|
||||
# - Pre-populated PVC via privileged init pod that dd's the ISO directly
|
||||
# - Migration to a different storage class that exposes block devices
|
||||
# differently (e.g. iSCSI, where Longhorn's CSI mount path may behave
|
||||
# differently)
|
||||
#
|
||||
# Population workflow (this PVC, Filesystem mode):
|
||||
# 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \
|
||||
# windows-server-2025-iso -n kubevirt-vms \
|
||||
# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \
|
||||
# --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \
|
||||
# --uploadproxy-url https://localhost:8443 --insecure
|
||||
# (--uploadproxy-url uses port-forward in practice: `kubectl port-forward
|
||||
# -n cdi service/cdi-uploadproxy 8443:443 &` first.)
|
||||
#
|
||||
# **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed:
|
||||
# BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...)
|
||||
# BdsDxe: failed to start Boot0001 ... Time out
|
||||
# Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the
|
||||
# timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC.
|
||||
# Block mode would likely fix it; see CDI permission issue above.
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: windows-server-2025-iso
|
||||
namespace: kubevirt-vms
|
||||
labels:
|
||||
app: ci-runner
|
||||
flowercore.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom
|
||||
storageClassName: longhorn
|
||||
|
||||
---
|
||||
# Root disk PVC — empty 200Gi volume that Windows installs into.
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ci1-rootdisk
|
||||
namespace: kubevirt-vms
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Gi
|
||||
storageClassName: longhorn
|
||||
|
||||
---
|
||||
# Sysprep ConfigMap — autounattend.xml for hands-off Windows install.
|
||||
# Sets local Administrator password (REPLACE the placeholder), enables RDP,
|
||||
# enables WinRM, sets hostname, and configures static-ish networking via DHCP.
|
||||
# The ISO + VirtIO drivers handle the rest.
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: ci1-autounattend
|
||||
namespace: kubevirt-vms
|
||||
data:
|
||||
autounattend.xml: |
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<unattend xmlns="urn:schemas-microsoft-com:unattend">
|
||||
|
||||
<!-- Pass 1: WindowsPE — Disk setup and VirtIO driver injection -->
|
||||
<settings pass="windowsPE">
|
||||
<component name="Microsoft-Windows-International-Core-WinPE"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<SetupUILanguage>
|
||||
<UILanguage>en-US</UILanguage>
|
||||
</SetupUILanguage>
|
||||
<InputLocale>en-US</InputLocale>
|
||||
<SystemLocale>en-US</SystemLocale>
|
||||
<UILanguage>en-US</UILanguage>
|
||||
<UserLocale>en-US</UserLocale>
|
||||
</component>
|
||||
|
||||
<component name="Microsoft-Windows-PnpCustomizationsWinPE"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<DriverPaths>
|
||||
<PathAndCredentials wcm:action="add" wcm:keyValue="1">
|
||||
<Path>E:\amd64\2k25</Path>
|
||||
</PathAndCredentials>
|
||||
</DriverPaths>
|
||||
</component>
|
||||
|
||||
<component name="Microsoft-Windows-Setup"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<DiskConfiguration>
|
||||
<Disk wcm:action="add">
|
||||
<DiskID>0</DiskID>
|
||||
<WillWipeDisk>true</WillWipeDisk>
|
||||
<CreatePartitions>
|
||||
<CreatePartition wcm:action="add">
|
||||
<Order>1</Order>
|
||||
<Size>260</Size>
|
||||
<Type>EFI</Type>
|
||||
</CreatePartition>
|
||||
<CreatePartition wcm:action="add">
|
||||
<Order>2</Order>
|
||||
<Size>128</Size>
|
||||
<Type>MSR</Type>
|
||||
</CreatePartition>
|
||||
<CreatePartition wcm:action="add">
|
||||
<Order>3</Order>
|
||||
<Extend>true</Extend>
|
||||
<Type>Primary</Type>
|
||||
</CreatePartition>
|
||||
</CreatePartitions>
|
||||
<ModifyPartitions>
|
||||
<ModifyPartition wcm:action="add">
|
||||
<Order>1</Order>
|
||||
<PartitionID>1</PartitionID>
|
||||
<Format>FAT32</Format>
|
||||
<Label>EFI</Label>
|
||||
</ModifyPartition>
|
||||
<ModifyPartition wcm:action="add">
|
||||
<Order>2</Order>
|
||||
<PartitionID>2</PartitionID>
|
||||
</ModifyPartition>
|
||||
<ModifyPartition wcm:action="add">
|
||||
<Order>3</Order>
|
||||
<PartitionID>3</PartitionID>
|
||||
<Format>NTFS</Format>
|
||||
<Label>Windows</Label>
|
||||
</ModifyPartition>
|
||||
</ModifyPartitions>
|
||||
</Disk>
|
||||
</DiskConfiguration>
|
||||
|
||||
<ImageInstall>
|
||||
<OSImage>
|
||||
<InstallTo>
|
||||
<DiskID>0</DiskID>
|
||||
<PartitionID>3</PartitionID>
|
||||
</InstallTo>
|
||||
<!-- Index 2 = Standard Desktop Experience. Use 4 for Datacenter Desktop. -->
|
||||
<InstallFrom>
|
||||
<MetaData wcm:action="add">
|
||||
<Key>/IMAGE/INDEX</Key>
|
||||
<Value>2</Value>
|
||||
</MetaData>
|
||||
</InstallFrom>
|
||||
</OSImage>
|
||||
</ImageInstall>
|
||||
|
||||
<UserData>
|
||||
<AcceptEula>true</AcceptEula>
|
||||
<FullName>FlowerCore CI Runner</FullName>
|
||||
<Organization>FlowerCore</Organization>
|
||||
<!-- Eval install — no product key needed for 180-day evaluation -->
|
||||
</UserData>
|
||||
</component>
|
||||
</settings>
|
||||
|
||||
<!-- Pass 4: Specialize — Hostname, RDP, WinRM -->
|
||||
<settings pass="specialize">
|
||||
<component name="Microsoft-Windows-Shell-Setup"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<ComputerName>CI1</ComputerName>
|
||||
<TimeZone>Central Standard Time</TimeZone>
|
||||
</component>
|
||||
|
||||
<component name="Microsoft-Windows-TerminalServices-LocalSessionManager"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<fDenyTSConnections>false</fDenyTSConnections>
|
||||
</component>
|
||||
</settings>
|
||||
|
||||
<!-- Pass 7: OOBE — Admin account, RDP firewall, WinRM -->
|
||||
<settings pass="oobeSystem">
|
||||
<component name="Microsoft-Windows-Shell-Setup"
|
||||
processorArchitecture="amd64"
|
||||
publicKeyToken="31bf3856ad364e35"
|
||||
language="neutral" versionScope="nonSxS">
|
||||
<OOBE>
|
||||
<HideEULAPage>true</HideEULAPage>
|
||||
<HideLocalAccountScreen>true</HideLocalAccountScreen>
|
||||
<HideOEMRegistrationScreen>true</HideOEMRegistrationScreen>
|
||||
<HideOnlineAccountScreens>true</HideOnlineAccountScreens>
|
||||
<HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE>
|
||||
<ProtectYourPC>3</ProtectYourPC>
|
||||
</OOBE>
|
||||
<UserAccounts>
|
||||
<AdministratorPassword>
|
||||
<!-- Real password is in 1Password — vault qaphopopkryhbg353ukzhhuqoq,
|
||||
item id h3ix4mgfk65gmkcmvh6ly3d3hu, title:
|
||||
"ci1 Administrator (Windows Server 2025 KubeVirt VM)".
|
||||
Field "autounattend AdministratorPassword Value (UTF-16-LE base64)"
|
||||
matches the Value below.
|
||||
To rotate: regenerate, recompute base64
|
||||
$combined = $pw + "AdministratorPassword"
|
||||
[Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($combined))
|
||||
then update both 1P item AND this Value field, recreate VM. -->
|
||||
<Value>bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
|
||||
<PlainText>false</PlainText>
|
||||
</AdministratorPassword>
|
||||
</UserAccounts>
|
||||
<FirstLogonCommands>
|
||||
<SynchronousCommand wcm:action="add">
|
||||
<Order>1</Order>
|
||||
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Set-NetFirewallRule -DisplayGroup 'Remote Desktop' -Enabled True"</CommandLine>
|
||||
<Description>Enable RDP firewall rule</Description>
|
||||
</SynchronousCommand>
|
||||
<SynchronousCommand wcm:action="add">
|
||||
<Order>2</Order>
|
||||
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Enable-PSRemoting -Force; Set-Item WSMan:\localhost\Service\Auth\Basic $true; Set-Item WSMan:\localhost\Service\AllowUnencrypted $true"</CommandLine>
|
||||
<Description>Enable WinRM (Phase 2 will pivot to HTTPS via step-ca cert)</Description>
|
||||
</SynchronousCommand>
|
||||
<SynchronousCommand wcm:action="add">
|
||||
<Order>3</Order>
|
||||
<CommandLine>cmd.exe /c reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System" /v EnableLUA /t REG_DWORD /d 0 /f</CommandLine>
|
||||
<Description>Disable UAC (Phase 2 Puppet will re-evaluate)</Description>
|
||||
</SynchronousCommand>
|
||||
</FirstLogonCommands>
|
||||
</component>
|
||||
</settings>
|
||||
</unattend>
|
||||
|
||||
---
|
||||
# VirtualMachine — Windows Server 2025 CI runner.
|
||||
apiVersion: kubevirt.io/v1
|
||||
kind: VirtualMachine
|
||||
metadata:
|
||||
@@ -309,33 +25,7 @@ metadata:
|
||||
role: github-actions-runner
|
||||
flowercore.io/managed-by: bluejay-infra
|
||||
spec:
|
||||
# `running: true` is deprecated in favor of `runStrategy`. They are mutually
|
||||
# exclusive — KubeVirt's validating webhook rejects any VM that sets both:
|
||||
# admission webhook "virtualmachine-validator.kubevirt.io" denied the request:
|
||||
# Running and RunStrategy are mutually exclusive.
|
||||
# `Always` keeps a VMI running and restarts it if it crashes/exits — same
|
||||
# semantics as the old `running: true`.
|
||||
#
|
||||
# **2026-05-08 status: VM cannot start due to a stale QEMU flock on the
|
||||
# rootdisk PVC** (qemu reports `Failed to get "write" lock` on
|
||||
# `/var/run/kubevirt-private/vmi-disks/rootdisk/disk.img`). The flock was
|
||||
# left by a previous QEMU process during a force-deleted launcher pod
|
||||
# cycle. Recovery requires either (a) a Longhorn engine restart on
|
||||
# rke2-agent2, (b) a Longhorn volume detach via the longhorn-manager API
|
||||
# (kubectl patch on `volume.longhorn.io/<pvc-name>` does not work — the
|
||||
# spec.nodeID is reconciled back), or (c) a node reboot of rke2-agent2.
|
||||
#
|
||||
# **Confirmed working:** the bootOrder swap (windows-iso=1, rootdisk=2)
|
||||
# and the runStrategy migration (above). The ISO PVC was successfully
|
||||
# repopulated via virtctl image-upload pvc on the Filesystem-mode PVC.
|
||||
#
|
||||
# **Open: SATA CDROM read timeout** — even with bootOrder=1, OVMF reported
|
||||
# `BdsDxe: failed to start Boot0001 ... Time out` reading the SATA CDROM
|
||||
# backed by the Filesystem-mode PVC. A switch to Block-mode DataVolume
|
||||
# was attempted but blocked by a CDI v1.65.0 upload-pod permission issue
|
||||
# (capability drop prevents writing to the underlying block device).
|
||||
# See header docstring on the ISO PVC.
|
||||
runStrategy: Always # LIVE — ISO uploaded 2026-05-08, password in 1P
|
||||
runStrategy: Halted
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
@@ -377,46 +67,16 @@ spec:
|
||||
firmware:
|
||||
bootloader:
|
||||
efi:
|
||||
secureBoot: true
|
||||
secureBoot: false
|
||||
devices:
|
||||
tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker
|
||||
tpm: {}
|
||||
disks:
|
||||
# bootOrder: ISO must be 1 for first-boot install (the rootdisk has no
|
||||
# EFI bootloader yet). After Windows installs, it writes its own UEFI
|
||||
# Boot#### entries pointing at the rootdisk's EFI partition; UEFI then
|
||||
# boots from rootdisk going forward and the ISO at bootOrder:2 acts as
|
||||
# a fallback for re-install scenarios.
|
||||
#
|
||||
# Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried
|
||||
# the empty virtio disk first, got nothing, fell back to the SATA
|
||||
# CDROM at Boot0001 with a short timeout, and timed out before the
|
||||
# CDROM enumerated. Console showed:
|
||||
# BdsDxe: failed to start Boot0001 ... Time out
|
||||
# BdsDxe: No bootable option or device was found.
|
||||
# Confirmed via debug pod: PVC content IS a real bootable ISO9660
|
||||
# (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the
|
||||
# only bug was boot priority.
|
||||
- name: windows-iso
|
||||
bootOrder: 1
|
||||
cdrom:
|
||||
bus: sata
|
||||
- name: rootdisk
|
||||
bootOrder: 2
|
||||
disk:
|
||||
bus: virtio
|
||||
- name: virtio-drivers
|
||||
cdrom:
|
||||
bus: sata
|
||||
- name: sysprep
|
||||
cdrom:
|
||||
bus: sata
|
||||
interfaces:
|
||||
# Pod-network fallback for Phase 1. To switch to PROD VLAN once Multus
|
||||
# + the prod-vlan57 NAD exist, replace this block with:
|
||||
# - name: prod-net
|
||||
# bridge: {}
|
||||
# model: virtio
|
||||
# and update the networks: stanza to use multus.networkName: kubevirt-vms/prod-vlan57
|
||||
# Pod-network fallback for CI runner outbound traffic. Switch to
|
||||
# prod-vlan57 once the bridge/NAD lane is ready for L2 access.
|
||||
- name: default
|
||||
masquerade: {}
|
||||
model: virtio
|
||||
@@ -427,32 +87,7 @@ spec:
|
||||
pod: {}
|
||||
volumes:
|
||||
- name: rootdisk
|
||||
persistentVolumeClaim:
|
||||
claimName: ci1-rootdisk
|
||||
- name: windows-iso
|
||||
# Path B (2026-05-08): mount ISO from Synology NFS instead of
|
||||
# Longhorn Filesystem PVC. The Filesystem-PVC path was confirmed to
|
||||
# contain a valid bootable ISO9660 image but caused OVMF's
|
||||
# SATA-CDROM read window to time out:
|
||||
# BdsDxe: failed to start Boot0001 ... Time out
|
||||
# Block-mode DataVolume was attempted as Path A but blocked by CDI
|
||||
# v1.65.0's upload pod capability drop. NFS-mounted ISO bypasses
|
||||
# both issues. See win2025-iso-nfs-pv.yaml header for full rationale
|
||||
# and Synology layout.
|
||||
persistentVolumeClaim:
|
||||
claimName: windows-server-2025-iso-nfs
|
||||
- name: virtio-drivers
|
||||
containerDisk:
|
||||
# Pinned to v1.8.2 (latest stable as of 2026-05-08).
|
||||
# The :latest tag uses Docker manifest v1 schema which containerd
|
||||
# 2.1 (RKE2 v1.34.5) refuses to pull with:
|
||||
# "media type application/vnd.docker.distribution.manifest.v1+prettyjws
|
||||
# is no longer supported since containerd v2.1"
|
||||
# v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1.
|
||||
# Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags
|
||||
image: quay.io/kubevirt/virtio-container-disk:v1.8.2
|
||||
- name: sysprep
|
||||
sysprep:
|
||||
configMap:
|
||||
name: ci1-autounattend
|
||||
image: localhost/fc-win-server-2025:v1
|
||||
imagePullPolicy: Never
|
||||
terminationGracePeriodSeconds: 3600
|
||||
|
||||
3
apps/kubevirt-vms/kustomization.yaml
Normal file
3
apps/kubevirt-vms/kustomization.yaml
Normal file
@@ -0,0 +1,3 @@
|
||||
resources:
|
||||
- ci1.yaml
|
||||
- prod-vlan57-nad.yaml
|
||||
@@ -207,20 +207,13 @@ spec:
|
||||
- port: 993
|
||||
targetPort: 993
|
||||
name: imaps
|
||||
---
|
||||
# TLS Certificate via cert-manager
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: mail-tls
|
||||
namespace: mail
|
||||
spec:
|
||||
secretName: mail-tls
|
||||
issuerRef:
|
||||
name: step-ca-acme
|
||||
kind: ClusterIssuer
|
||||
dnsNames:
|
||||
- mail.iamworkin.lan
|
||||
# --- mail-tls Certificate REMOVED 2026-06-01 ---
|
||||
# mail-tls is now managed OUTSIDE cert-manager: issued from step-ca's JWK 'admin'
|
||||
# provisioner and auto-renewed by a systemd timer on noc1 (step ca renew), which
|
||||
# writes the mail-tls secret directly. step-ca-acme only has an HTTP-01 (Traefik)
|
||||
# solver, but mail.iamworkin.lan must resolve to the dedicated MetalLB IP 10.0.56.202
|
||||
# (SMTP/IMAP), so HTTP-01 cannot validate. Do NOT re-add a cert-manager Certificate
|
||||
# here unless a DNS-01 solver is deployed for step-ca-acme.
|
||||
---
|
||||
# Traefik IngressRoute - Webmail placeholder
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
|
||||
@@ -75,6 +75,20 @@ data:
|
||||
cluster: "rke2"
|
||||
role: "agent"
|
||||
|
||||
# Mac mini macOS runner node (INFRA VLAN)
|
||||
- job_name: "macmini-node"
|
||||
scrape_timeout: 15s
|
||||
static_configs:
|
||||
- targets: ["10.0.56.115:9100"]
|
||||
labels:
|
||||
instance: "macmini"
|
||||
host: "macmini.iamworkin.lan"
|
||||
vlan: "infra"
|
||||
arch: "arm64"
|
||||
role: "macos-runner"
|
||||
puppet_managed: "true"
|
||||
puppet_server: "puppet.iamworkin.lan"
|
||||
|
||||
# In-cluster node-exporter DaemonSet
|
||||
- job_name: "k8s-node-exporter"
|
||||
kubernetes_sd_configs:
|
||||
@@ -202,19 +216,24 @@ data:
|
||||
- job_name: "pimanager-app"
|
||||
scrape_interval: 15s
|
||||
metrics_path: /metrics
|
||||
scheme: https
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
static_configs:
|
||||
- targets: ["10.0.58.25:5000"]
|
||||
- targets: ["piez.iamworkin.lan"]
|
||||
labels:
|
||||
instance: "piez"
|
||||
service: "pimanager"
|
||||
service: "signalcontrol"
|
||||
vlan: "home"
|
||||
device: "pi4-ezconnect"
|
||||
- targets: ["10.0.58.113:5100"]
|
||||
rig: "signal-b"
|
||||
- targets: ["pirelay.iamworkin.lan"]
|
||||
labels:
|
||||
instance: "pirelay"
|
||||
service: "pimanager"
|
||||
service: "signalcontrol"
|
||||
vlan: "home"
|
||||
device: "pi3-ks0212"
|
||||
rig: "signal-a"
|
||||
|
||||
# Epson ET-3750 EcoTank Printer SNMP
|
||||
- job_name: "snmp-printer"
|
||||
@@ -266,13 +285,14 @@ data:
|
||||
printer_model: "NuPrint 210"
|
||||
|
||||
# Print.Web health (Blazor app on edge2:5200)
|
||||
# Target `/health` (anonymous) — root path requires API key auth and returns 401.
|
||||
- job_name: "probe-printweb"
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
scrape_interval: 30s
|
||||
static_configs:
|
||||
- targets: ["http://10.0.57.16:5200/"]
|
||||
- targets: ["http://10.0.57.16:5200/health"]
|
||||
labels:
|
||||
instance: "print-web"
|
||||
service: "print-web"
|
||||
@@ -464,24 +484,33 @@ data:
|
||||
- "https://gitea.iamworkin.lan/"
|
||||
- "https://argocd.iamworkin.lan/"
|
||||
- "https://intranet.iamworkin.lan/"
|
||||
- "https://signage.iamworkin.lan/"
|
||||
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://kiosk.iamworkin.lan/"
|
||||
- "https://media.iamworkin.lan/"
|
||||
- "https://mysql.iamworkin.lan/"
|
||||
- "https://php.iamworkin.lan/"
|
||||
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||
- "https://zabbix.iamworkin.lan/"
|
||||
- "https://desktop.iamworkin.lan/"
|
||||
- "https://print.iamworkin.lan/"
|
||||
- "https://dns.iamworkin.lan/"
|
||||
- "https://chat.iamworkin.lan/"
|
||||
- "https://dist.iamworkin.lan/"
|
||||
- "https://dms.iamworkin.lan/"
|
||||
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
|
||||
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
|
||||
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
|
||||
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
|
||||
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
|
||||
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
|
||||
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
|
||||
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
|
||||
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
|
||||
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
|
||||
- "https://menuboard.iamworkin.lan/"
|
||||
- "https://messageboard.iamworkin.lan/"
|
||||
- "https://presentations.iamworkin.lan/"
|
||||
- "https://retail.iamworkin.lan/"
|
||||
- "https://ttsreader.iamworkin.lan/"
|
||||
# Explicit healthcheck paths
|
||||
- "https://library.iamworkin.lan/health"
|
||||
- "https://aistation.iamworkin.lan/healthz"
|
||||
- "https://knowledge.iamworkin.lan/healthz"
|
||||
- "https://fc-llm-bridge.iamworkin.lan/healthz"
|
||||
- "https://acme.iamworkin.lan/health"
|
||||
# NOTE: services intentionally NOT in this probe surface
|
||||
@@ -697,6 +726,36 @@ data:
|
||||
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
||||
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
||||
|
||||
- name: macmini-runners
|
||||
rules:
|
||||
- alert: MacMiniRunnerOffline
|
||||
expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
service: github-runner
|
||||
annotations:
|
||||
summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
|
||||
description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
|
||||
|
||||
- name: linux-runners
|
||||
rules:
|
||||
- alert: LinuxRunnerOffline
|
||||
expr: |
|
||||
kube_deployment_status_replicas_ready{
|
||||
namespace="github-runner",
|
||||
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
|
||||
} == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
alert_channel: irc
|
||||
service: github-runner
|
||||
team: ci
|
||||
annotations:
|
||||
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||
description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
|
||||
|
||||
- name: remote-desktop
|
||||
rules:
|
||||
- alert: RemoteDesktopWebDown
|
||||
@@ -863,12 +922,13 @@ data:
|
||||
# of idle and SNMP times out, so 5m for: would page nightly. A
|
||||
# genuine printer outage (jam, disconnected) lasts well over 30m.
|
||||
- alert: EpsonPrinterDown
|
||||
expr: up{job="snmp-printer"} == 0
|
||||
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
severity: info
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
|
||||
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
|
||||
|
||||
- alert: SynologyDiskLow
|
||||
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
|
||||
@@ -922,6 +982,52 @@ data:
|
||||
annotations:
|
||||
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
|
||||
|
||||
# Puppet agent + service alerts.
|
||||
# Mirror of FlowerCore.Notes/scripts/monitoring/alerts.yml `puppet` group
|
||||
# so a future migration to in-cluster Prometheus inherits the ruleset.
|
||||
# Source-of-truth for the live Podman Prometheus on noc1 is the Notes file.
|
||||
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||
- name: puppet
|
||||
rules:
|
||||
- alert: PuppetAgentReportStale
|
||||
expr: puppet_last_run_age_seconds > 7200
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Puppet agent {{ $labels.instance }} hasn't reported in over 2h"
|
||||
description: "Last run age: {{ $value | humanizeDuration }}. The puppet agent on {{ $labels.instance }} may be stopped, the node may be powered off, or noc1 may be unreachable from this node."
|
||||
runbook: "1. SSH to node (via noc1 jumpbox if needed) 2. sudo systemctl status puppet 3. sudo puppet agent -t --noop to force a run 4. Check r10k: ssh fcadmin@10.0.56.10 'sudo podman logs openvoxserver --tail 50' 5. Verify noc1 reachability: ping puppet.iamworkin.lan"
|
||||
|
||||
- alert: PuppetAgentReportCritical
|
||||
expr: puppet_last_run_age_seconds > 86400
|
||||
for: 1h
|
||||
labels:
|
||||
severity: critical
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Puppet agent {{ $labels.instance }} silent for over 24h — node is unmanaged"
|
||||
description: "Last run age: {{ $value | humanizeDuration }}. Node {{ $labels.instance }} has not submitted a Puppet report in over 24 hours. Config drift is accumulating — investigate immediately. If intentional (maintenance), add to the exclusion filter or silence in Grafana."
|
||||
runbook: "URGENT: 1. Check node power state 2. SSH via noc1 jumpbox: ssh fcadmin@10.0.56.10 then ssh <node> 3. sudo systemctl status puppet 4. sudo systemctl start puppet + sudo puppet agent -t 5. Check for network partitions (VLAN connectivity to 10.0.56.10) 6. If node was recently reimaged: sudo puppet agent -t to re-register with new SSL cert"
|
||||
|
||||
# Sprint 33 Cx-7 Phase B (2026-05-25 postmortem follow-up):
|
||||
# Detects puppet.service in failed state — distinct from PuppetAgentReportStale
|
||||
# which catches "agent hasn't run." This catches "systemd gave up restarting it"
|
||||
# (CA-verify loop or other fatal exit). Requires node-exporter systemd collector
|
||||
# enabled with --collector.systemd. If `node_systemd_unit_state` has no series
|
||||
# for a node, the collector is disabled there — flag in postmortem follow-up.
|
||||
- alert: PuppetServiceFailed
|
||||
expr: node_systemd_unit_state{name="puppet.service",state="failed"} == 1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Puppet service failed on {{ $labels.instance }}"
|
||||
description: "puppet.service on {{ $labels.instance }} has been in failed state for 5+ minutes. systemd has stopped auto-restarting (CA-verify-loop or other exit). Manual `systemctl status puppet` confirms. Run `sudo systemctl start puppet` to recover; investigate journal for root cause."
|
||||
runbook_url: "https://github.com/astoltz/FlowerCore.Notes/blob/master/memory/feedback_puppet_service_dead_after_ca_loop_alert_misreads.md"
|
||||
|
||||
# K8s pod-state alerts. Require kube-state-metrics scrape (added
|
||||
# 2026-04-26 — see scrape_configs above). Would have surfaced the
|
||||
# agent-zero ollama-proxy 172x crash-loop instead of letting it
|
||||
@@ -929,7 +1035,12 @@ data:
|
||||
- name: kubernetes-state
|
||||
rules:
|
||||
- alert: KubeContainerRestartingFrequently
|
||||
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
|
||||
# Exclude github-runner: ephemeral runners register, run one job,
|
||||
# exit cleanly, and restart by design. Also require kube_pod_info so
|
||||
# deleted rollout pods do not keep firing from retained restart series.
|
||||
expr: |
|
||||
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
|
||||
and on(namespace, pod) kube_pod_info
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -938,7 +1049,12 @@ data:
|
||||
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
|
||||
|
||||
- alert: KubeContainerCrashLooping
|
||||
expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
|
||||
# Same github-runner/delete-retention exclusions as the hourly
|
||||
# restart rule above; real runner failures are covered by the
|
||||
# dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts.
|
||||
expr: |
|
||||
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
|
||||
and on(namespace, pod) kube_pod_info
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
@@ -966,7 +1082,10 @@ data:
|
||||
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
|
||||
|
||||
- alert: KubeDeploymentReplicasMismatch
|
||||
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
|
||||
# github-runner has explicit runner-offline alerts; the generic
|
||||
# replica-mismatch rule should not page on intentionally ephemeral
|
||||
# 0/1 runner churn between CI jobs.
|
||||
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
@@ -974,6 +1093,39 @@ data:
|
||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||
|
||||
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
||||
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
||||
# outage (21h) hit because no alert fired on the rising multus working
|
||||
# set — only downstream blackbox / Traefik / service alerts. With
|
||||
# 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
|
||||
# runs ~150-250MiB so this only fires when an avalanche starts.
|
||||
- alert: MultusMemoryPressure
|
||||
expr: |
|
||||
container_memory_working_set_bytes{container="kube-multus"}
|
||||
/ container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
alert_channel: thermal_print
|
||||
annotations:
|
||||
summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
|
||||
description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
|
||||
|
||||
# Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
|
||||
# operator-leak avalanche pattern BEFORE it cascades into a multus
|
||||
# CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
|
||||
# emitting pods without ownerReferences will accumulate them when
|
||||
# the operator crashes. >25 pending pods in any namespace for 30m
|
||||
# is the signal to investigate the reconciler.
|
||||
- alert: NamespacePendingPodBacklog
|
||||
expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
|
||||
description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
|
||||
|
||||
# Longhorn storage health alerts. Required: longhorn scrape job
|
||||
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
||||
# for "snapshot becomes not ready to use" are transient lifecycle
|
||||
@@ -1150,24 +1302,55 @@ metadata:
|
||||
data:
|
||||
notify.py: |
|
||||
#!/usr/bin/env python3
|
||||
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
|
||||
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
|
||||
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
|
||||
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
|
||||
|
||||
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
|
||||
/api/print/alert. Thermal printing is BATCHED into hourly digests by
|
||||
default so the printer no longer spam-fires per Grafana webhook.
|
||||
|
||||
Routing (per Grafana webhook alert):
|
||||
- IRC: always per-event (operator likes the stream)
|
||||
- Thermal printer:
|
||||
* severity in {critical,disaster,page} OR
|
||||
label alert_channel=thermal_print_immediate -> print NOW
|
||||
* label alert_channel=thermal_print -> enqueue into hourly digest
|
||||
* everything else -> IRC only
|
||||
- RESOLVED webhooks remove the alert from the digest buffer
|
||||
|
||||
Env vars (defaults preserve old behavior on first deploy):
|
||||
THERMAL_PRINT_ENABLED default "true" - master kill switch
|
||||
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
|
||||
BATCH_MAX_PENDING default "50" - force-flush threshold
|
||||
|
||||
HTTP surface:
|
||||
POST / - Grafana webhook entry
|
||||
POST /flush - manual digest flush (idempotent)
|
||||
GET / - status + config + buffer depth + stats
|
||||
"""
|
||||
import json, socket, sys, time
|
||||
import json, os, socket, sys, threading, time
|
||||
from collections import defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from urllib.request import Request, urlopen
|
||||
from urllib.error import URLError
|
||||
|
||||
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
|
||||
IRC_PORT = 6667
|
||||
IRC_NICK = "grafana-bot"
|
||||
IRC_CHANNEL = "#alerts"
|
||||
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
|
||||
PRINT_ENABLED = True
|
||||
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
|
||||
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
|
||||
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
|
||||
|
||||
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
|
||||
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
|
||||
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
|
||||
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
|
||||
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
|
||||
|
||||
_buffer_lock = threading.Lock()
|
||||
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
|
||||
_last_flush_time = time.time()
|
||||
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
|
||||
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
|
||||
"buffer_resolved": 0, "started_at": time.time()}
|
||||
|
||||
def send_irc(message):
|
||||
"""Connect, handle PING, join, send, quit."""
|
||||
try:
|
||||
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
||||
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
||||
@@ -1200,52 +1383,137 @@ data:
|
||||
time.sleep(0.5)
|
||||
sock.sendall(b"QUIT :alert delivered\r\n")
|
||||
sock.close()
|
||||
_stats["irc_sent"] += 1
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def send_thermal_print(alert):
|
||||
if not PRINT_ENABLED: return
|
||||
labels = alert.get("labels", {})
|
||||
annotations = alert.get("annotations", {})
|
||||
status = alert.get("status", "firing").upper()
|
||||
summary = annotations.get("summary", "")
|
||||
description = annotations.get("description", "")
|
||||
runbook = annotations.get("runbook", "")
|
||||
# Build a useful message: summary + description + runbook steps
|
||||
parts = []
|
||||
if summary: parts.append(summary)
|
||||
if description and description != summary: parts.append(description)
|
||||
if runbook: parts.append("STEPS: " + runbook)
|
||||
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
|
||||
payload = {
|
||||
"title": labels.get("alertname", "Unknown"),
|
||||
"severity": labels.get("severity", "warning").capitalize(),
|
||||
"host": labels.get("instance", labels.get("host", "unknown")),
|
||||
"message": message,
|
||||
"eventId": alert.get("fingerprint", ""),
|
||||
"source": "Grafana",
|
||||
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
|
||||
"acknowledged": False
|
||||
}
|
||||
def post_thermal(payload, kind):
|
||||
if not THERMAL_PRINT_ENABLED:
|
||||
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
|
||||
return False
|
||||
try:
|
||||
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
||||
headers={"Content-Type": "application/json"}, method="POST")
|
||||
resp = urlopen(req, timeout=10)
|
||||
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
|
||||
if kind == "immediate": _stats["print_immediate"] += 1
|
||||
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
|
||||
|
||||
def should_print(alert):
|
||||
labels = alert.get("labels", {})
|
||||
if labels.get("alert_channel") == "thermal_print": return True
|
||||
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
|
||||
if alert.get("status", "").upper() == "RESOLVED": return False
|
||||
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
def fingerprint_of(alert):
|
||||
fp = alert.get("fingerprint", "")
|
||||
if fp: return fp
|
||||
labels = alert.get("labels", {})
|
||||
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
|
||||
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
|
||||
|
||||
def is_critical(alert):
|
||||
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
|
||||
|
||||
def is_immediate_label(alert):
|
||||
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
|
||||
|
||||
def is_batched_label(alert):
|
||||
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
|
||||
|
||||
def add_to_digest(alert):
|
||||
"""Add an alert to the digest buffer. Returns True if the buffer GREW
|
||||
(new fingerprint), False if it was a dedup, resolution, or no-op.
|
||||
"""
|
||||
if not THERMAL_PRINT_ENABLED: return False
|
||||
fp = fingerprint_of(alert)
|
||||
status = alert.get("status", "firing").lower()
|
||||
with _buffer_lock:
|
||||
if status == "resolved":
|
||||
if fp in _buffer:
|
||||
del _buffer[fp]
|
||||
_stats["buffer_resolved"] += 1
|
||||
return False
|
||||
if fp in _buffer:
|
||||
_buffer[fp]["last_seen"] = time.time()
|
||||
_buffer[fp]["alert"] = alert
|
||||
_stats["buffer_dedup"] += 1
|
||||
return False
|
||||
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
|
||||
_stats["buffer_added"] += 1
|
||||
return True
|
||||
|
||||
def build_digest_payload():
|
||||
with _buffer_lock:
|
||||
items = list(_buffer.values())
|
||||
if not items: return None
|
||||
by_name = defaultdict(list)
|
||||
for item in items:
|
||||
labels = item["alert"].get("labels", {})
|
||||
by_name[labels.get("alertname", "Unknown")].append(item)
|
||||
lines = []
|
||||
for name, group in sorted(by_name.items()):
|
||||
targets = []
|
||||
for it in group[:5]:
|
||||
labels = it["alert"].get("labels", {})
|
||||
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
|
||||
or labels.get("statefulset") or labels.get("namespace") or "?")
|
||||
targets.append(t)
|
||||
more = f" (+{len(group)-5})" if len(group) > 5 else ""
|
||||
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
|
||||
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
|
||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
title = f"Alert digest: {len(items)} firing"
|
||||
body = "\n".join([
|
||||
f"=== {title} ===",
|
||||
f"as of {now}",
|
||||
"",
|
||||
*lines,
|
||||
"",
|
||||
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
|
||||
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
|
||||
])
|
||||
return {"title": title, "severity": "Warning", "host": "monitoring",
|
||||
"message": body, "eventId": f"digest-{int(time.time())}",
|
||||
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
|
||||
|
||||
def flush_digest():
|
||||
payload = build_digest_payload()
|
||||
if payload is None:
|
||||
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
|
||||
return False
|
||||
sent = post_thermal(payload, "digest")
|
||||
with _buffer_lock:
|
||||
_buffer.clear()
|
||||
if sent: _stats["digest_flushed"] += 1
|
||||
return sent
|
||||
|
||||
def digest_loop():
|
||||
global _last_flush_time
|
||||
while True:
|
||||
try:
|
||||
now = time.time()
|
||||
elapsed = now - _last_flush_time
|
||||
if elapsed >= BATCH_INTERVAL_MIN * 60:
|
||||
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
|
||||
flush_digest()
|
||||
_last_flush_time = now
|
||||
elif len(_buffer) >= BATCH_MAX_PENDING:
|
||||
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
|
||||
flush_digest()
|
||||
_last_flush_time = now
|
||||
time.sleep(15)
|
||||
except Exception as e:
|
||||
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
|
||||
time.sleep(60)
|
||||
|
||||
class Handler(BaseHTTPRequestHandler):
|
||||
def do_POST(self):
|
||||
if self.path == "/flush":
|
||||
ok = flush_digest()
|
||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
|
||||
return
|
||||
_stats["webhooks_received"] += 1
|
||||
length = int(self.headers.get("Content-Length", 0))
|
||||
body = json.loads(self.rfile.read(length)) if length else {}
|
||||
for alert in body.get("alerts", []):
|
||||
@@ -1260,22 +1528,56 @@ data:
|
||||
msg = f"{icon}{sev_tag} {name}: {summary}"
|
||||
if desc: msg += f"\n {desc}"
|
||||
send_irc(msg)
|
||||
if should_print(alert): send_thermal_print(alert)
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
# Thermal routing — EVERYTHING (including criticals) goes into
|
||||
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
|
||||
# label bypasses, and even that flushes-the-current-digest rather
|
||||
# than printing a standalone job, so the same fingerprint can't
|
||||
# spam the printer per webhook cycle.
|
||||
if status == "RESOLVED":
|
||||
add_to_digest(alert) # removes from buffer
|
||||
continue
|
||||
if is_immediate_label(alert):
|
||||
# Explicit opt-in for "paper this NOW" — first arrival of a
|
||||
# new fingerprint triggers an immediate digest flush; repeat
|
||||
# webhooks for the same fingerprint dedupe in the buffer
|
||||
# until the next interval or until the alert resolves.
|
||||
new_in_buffer = add_to_digest(alert)
|
||||
if new_in_buffer:
|
||||
global _last_flush_time
|
||||
flush_digest()
|
||||
_last_flush_time = time.time()
|
||||
elif is_critical(alert) or is_batched_label(alert):
|
||||
add_to_digest(alert)
|
||||
# else: IRC-only (warnings without thermal_print label)
|
||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||
self.wfile.write(b'{"status":"ok"}')
|
||||
|
||||
def do_GET(self):
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
|
||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||
with _buffer_lock:
|
||||
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
|
||||
depth = len(_buffer)
|
||||
info = {
|
||||
"service": "irc-notify",
|
||||
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
|
||||
"batch_interval_min": BATCH_INTERVAL_MIN,
|
||||
"batch_max_pending": BATCH_MAX_PENDING,
|
||||
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
|
||||
"print_web_url": PRINT_WEB_URL},
|
||||
"buffer": {"depth": depth, "alertnames": alertnames,
|
||||
"seconds_since_last_flush": int(time.time() - _last_flush_time),
|
||||
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
|
||||
"stats": _stats,
|
||||
}
|
||||
self.wfile.write(json.dumps(info, indent=2).encode())
|
||||
|
||||
def log_message(self, format, *args):
|
||||
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
||||
|
||||
if __name__ == "__main__":
|
||||
threading.Thread(target=digest_loop, daemon=True).start()
|
||||
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
||||
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
|
||||
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
|
||||
server.serve_forever()
|
||||
|
||||
# =============================================================================
|
||||
@@ -3362,6 +3664,39 @@ data:
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
||||
- orgId: 1
|
||||
name: CI Runners
|
||||
folder: CI Alerts
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: linux-runner-offline
|
||||
title: LinuxRunnerOffline
|
||||
condition: C
|
||||
for: 5m
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||
description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
|
||||
runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
|
||||
labels:
|
||||
severity: warning
|
||||
service: github-runner
|
||||
alert_channel: irc
|
||||
team: ci
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 300, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 300, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 300, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||
- orgId: 1
|
||||
name: Infrastructure
|
||||
folder: AI Stack Alerts
|
||||
@@ -3394,6 +3729,32 @@ data:
|
||||
relativeTimeRange: {from: 120, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||
- uid: macmini-runner-offline
|
||||
title: MacMiniRunnerOffline
|
||||
condition: C
|
||||
for: 10m
|
||||
noDataState: Alerting
|
||||
execErrState: OK
|
||||
annotations:
|
||||
summary: Mac mini GitHub runner offline
|
||||
description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
|
||||
runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
|
||||
labels:
|
||||
severity: warning
|
||||
service: github-runner
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange: {from: 600, to: 0}
|
||||
datasourceUid: prometheus
|
||||
model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
|
||||
- refId: B
|
||||
relativeTimeRange: {from: 600, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||
- refId: C
|
||||
relativeTimeRange: {from: 600, to: 0}
|
||||
datasourceUid: __expr__
|
||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||
- uid: high-cpu
|
||||
title: High CPU (>85%)
|
||||
condition: C
|
||||
|
||||
@@ -188,13 +188,24 @@ spec:
|
||||
- name: kube-multus
|
||||
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||
# 2026-05-11: upstream default of 50Mi memory limit OOM-cascades when
|
||||
# an operator-owned namespace accumulates >100 pending pods retrying
|
||||
# CNI ADD. RemoteDesktop emitted 219 orphan rd-browser-only pods
|
||||
# (missing OwnerReferences), kubelet's CNI ADD avalanche pushed multus
|
||||
# over 50Mi, OOMKilled, restarted with even bigger backlog → loop.
|
||||
# 21h cluster outage. See FlowerCore.Notes:
|
||||
# feedback_multus_50mi_limit_oom_orphan_pod_avalanche.md
|
||||
# 1Gi limit / 512Mi request comfortably handles a 200+ pod CNI
|
||||
# catchup burst on 64GB nodes (nodes are <25% used in steady-state).
|
||||
# Drop back toward 256Mi only after MultusMemoryPressure alert
|
||||
# proves steady-state working set sits well below 200Mi.
|
||||
resources:
|
||||
requests:
|
||||
cpu: "100m"
|
||||
memory: "50Mi"
|
||||
memory: "512Mi"
|
||||
limits:
|
||||
cpu: "100m"
|
||||
memory: "50Mi"
|
||||
memory: "1Gi"
|
||||
securityContext:
|
||||
privileged: true
|
||||
terminationMessagePolicy: FallbackToLogsOnError
|
||||
|
||||
@@ -24,7 +24,16 @@
|
||||
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
|
||||
# fc-signage:5190 for the signage AAT lane.
|
||||
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
|
||||
# telephony / gitea / fc-system / fc-signage namespaces on 4444.
|
||||
# telephony / gitea / fc-system / fc-signage / github-runner namespaces
|
||||
# on 4444.
|
||||
#
|
||||
# 2026-05-25: added github-runner ingress on 4444 so CI jobs running in
|
||||
# self-hosted runner pods (e.g. FlowerCore.Print.Web `help-screenshots`)
|
||||
# can reach the grid. Without this allow, the session POST to
|
||||
# `selenium-hub.selenium.svc.cluster.local:4444` was DNAT'd to the hub
|
||||
# pod IP and then dropped at the Calico ingress hook — Selenium UI showed
|
||||
# 0/4 sessions while the .NET HTTP client timed out at 60s. Same family
|
||||
# as `feedback_netpol_dnat_backend_port`, wrong-source-namespace flavor.
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
@@ -203,6 +212,13 @@ spec:
|
||||
ports:
|
||||
- port: 4444
|
||||
protocol: TCP
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: github-runner
|
||||
ports:
|
||||
- port: 4444
|
||||
protocol: TCP
|
||||
podSelector: {}
|
||||
policyTypes:
|
||||
- Ingress
|
||||
|
||||
427
apps/selenium/selenium-grid.yaml
Normal file
427
apps/selenium/selenium-grid.yaml
Normal file
@@ -0,0 +1,427 @@
|
||||
# Selenium Grid 4 — RKE2 deployment
|
||||
#
|
||||
# Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from
|
||||
# the GitHub Actions self-hosted runners. ArgoCD owns this namespace from
|
||||
# 2026-05-25 (`infra-selenium` Application; previously these resources were
|
||||
# orphan kubectl-applied since 2026-03-15).
|
||||
#
|
||||
# Endpoints:
|
||||
# - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444
|
||||
# - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444
|
||||
# - Traefik public: https://selenium.iamworkin.lan
|
||||
#
|
||||
# Browser maxSessions:
|
||||
# - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy
|
||||
# Print.Web help-screenshots was the global bottleneck;
|
||||
# see commit history for ops/runner-replica-rightsize)
|
||||
# - firefox 1
|
||||
# - edge 1
|
||||
#
|
||||
# Screenshots + video recording write to NFS via the chrome video sidecar.
|
||||
# See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-hub
|
||||
app.kubernetes.io/name: selenium-hub
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
name: selenium-hub
|
||||
namespace: selenium
|
||||
spec:
|
||||
ports:
|
||||
- name: web
|
||||
port: 4444
|
||||
targetPort: 4444
|
||||
- name: publish
|
||||
port: 4442
|
||||
targetPort: 4442
|
||||
- name: subscribe
|
||||
port: 4443
|
||||
targetPort: 4443
|
||||
selector:
|
||||
app: selenium-hub
|
||||
type: ClusterIP
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
metallb.io/ip-allocated-from-pool: bluejay-pool
|
||||
metallb.universe.tf/loadBalancerIPs: 10.0.56.208
|
||||
labels:
|
||||
app: selenium-hub
|
||||
component: external-access
|
||||
name: selenium-hub-external
|
||||
namespace: selenium
|
||||
spec:
|
||||
clusterIP: 10.43.90.147
|
||||
clusterIPs:
|
||||
- 10.43.90.147
|
||||
externalTrafficPolicy: Local
|
||||
healthCheckNodePort: 32213
|
||||
ports:
|
||||
- name: web
|
||||
nodePort: 32411
|
||||
port: 4444
|
||||
targetPort: 4444
|
||||
- name: publish
|
||||
nodePort: 32068
|
||||
port: 4442
|
||||
targetPort: 4442
|
||||
- name: subscribe
|
||||
nodePort: 31000
|
||||
port: 4443
|
||||
targetPort: 4443
|
||||
selector:
|
||||
app: selenium-hub
|
||||
type: LoadBalancer
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-hub
|
||||
app.kubernetes.io/name: selenium-hub
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
name: selenium-hub
|
||||
namespace: selenium
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: selenium-hub
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-hub
|
||||
app.kubernetes.io/name: selenium-hub
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: SE_NODE_SESSION_TIMEOUT
|
||||
value: '300'
|
||||
- name: SE_SESSION_REQUEST_TIMEOUT
|
||||
value: '300'
|
||||
- name: SE_SESSION_RETRY_INTERVAL
|
||||
value: '5'
|
||||
- name: JAVA_OPTS
|
||||
value: -Xmx512m
|
||||
image: selenium/hub:4.27.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /wd/hub/status
|
||||
port: 4444
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 5
|
||||
name: selenium-hub
|
||||
ports:
|
||||
- containerPort: 4444
|
||||
name: web
|
||||
- containerPort: 4442
|
||||
name: publish
|
||||
- containerPort: 4443
|
||||
name: subscribe
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /wd/hub/status
|
||||
port: 4444
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 5
|
||||
# Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi
|
||||
# limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the
|
||||
# stampede-buffer pattern documented for multus
|
||||
# (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m
|
||||
# against a 500m limit, no contention.
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 1536Mi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-chrome
|
||||
app.kubernetes.io/name: selenium-node-chrome
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
name: selenium-node-chrome
|
||||
namespace: selenium
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: selenium-node-chrome
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-chrome
|
||||
app.kubernetes.io/name: selenium-node-chrome
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: SE_EVENT_BUS_HOST
|
||||
value: selenium-hub
|
||||
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||
value: '4442'
|
||||
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||
value: '4443'
|
||||
- name: SE_NODE_MAX_SESSIONS
|
||||
value: '2'
|
||||
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||
value: 'false'
|
||||
- name: SE_VNC_NO_PASSWORD
|
||||
value: '1'
|
||||
- name: SE_SCREEN_WIDTH
|
||||
value: '1920'
|
||||
- name: SE_SCREEN_HEIGHT
|
||||
value: '1080'
|
||||
- name: SE_NODE_SESSION_TIMEOUT
|
||||
value: '300'
|
||||
image: selenium/node-chrome:4.27.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
name: selenium-chrome
|
||||
ports:
|
||||
- containerPort: 5555
|
||||
name: node
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 5
|
||||
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||
# tested-stable 2Gi limit. CPU unchanged.
|
||||
resources:
|
||||
limits:
|
||||
cpu: '1'
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
- env:
|
||||
- name: DISPLAY_CONTAINER_NAME
|
||||
value: localhost
|
||||
- name: SE_SCREEN_WIDTH
|
||||
value: '1920'
|
||||
- name: SE_SCREEN_HEIGHT
|
||||
value: '1080'
|
||||
- name: SE_VIDEO_FILE_NAME
|
||||
value: auto
|
||||
- name: SE_VIDEO_UPLOAD_ENABLED
|
||||
value: 'false'
|
||||
image: selenium/video:ffmpeg-7.1-20250101
|
||||
name: video
|
||||
resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 768Mi
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 384Mi
|
||||
volumeMounts:
|
||||
- mountPath: /videos
|
||||
name: selenium-videos
|
||||
volumes:
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
name: dshm
|
||||
- emptyDir:
|
||||
sizeLimit: 5Gi
|
||||
name: selenium-videos
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-firefox
|
||||
app.kubernetes.io/name: selenium-node-firefox
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
name: selenium-node-firefox
|
||||
namespace: selenium
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: selenium-node-firefox
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-firefox
|
||||
app.kubernetes.io/name: selenium-node-firefox
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: SE_EVENT_BUS_HOST
|
||||
value: selenium-hub
|
||||
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||
value: '4442'
|
||||
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||
value: '4443'
|
||||
- name: SE_NODE_MAX_SESSIONS
|
||||
value: '1'
|
||||
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||
value: 'true'
|
||||
- name: SE_VNC_NO_PASSWORD
|
||||
value: '1'
|
||||
- name: SE_START_VNC
|
||||
value: 'false'
|
||||
- name: SE_SCREEN_WIDTH
|
||||
value: '1920'
|
||||
- name: SE_SCREEN_HEIGHT
|
||||
value: '1080'
|
||||
- name: SE_NODE_SESSION_TIMEOUT
|
||||
value: '300'
|
||||
image: selenium/node-firefox:4.27.0
|
||||
livenessProbe:
|
||||
failureThreshold: 5
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 5
|
||||
name: selenium-firefox
|
||||
ports:
|
||||
- containerPort: 5555
|
||||
name: node
|
||||
readinessProbe:
|
||||
failureThreshold: 5
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 5
|
||||
resources:
|
||||
limits:
|
||||
cpu: '1'
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
volumes:
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
name: dshm
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-edge
|
||||
app.kubernetes.io/name: selenium-node-edge
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
name: selenium-node-edge
|
||||
namespace: selenium
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: selenium-node-edge
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: selenium-node-edge
|
||||
app.kubernetes.io/name: selenium-node-edge
|
||||
app.kubernetes.io/part-of: selenium-grid
|
||||
spec:
|
||||
containers:
|
||||
- env:
|
||||
- name: SE_EVENT_BUS_HOST
|
||||
value: selenium-hub
|
||||
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||
value: '4442'
|
||||
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||
value: '4443'
|
||||
- name: SE_NODE_MAX_SESSIONS
|
||||
value: '1'
|
||||
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||
value: 'true'
|
||||
- name: SE_VNC_NO_PASSWORD
|
||||
value: '1'
|
||||
- name: SE_SCREEN_WIDTH
|
||||
value: '1920'
|
||||
- name: SE_SCREEN_HEIGHT
|
||||
value: '1080'
|
||||
- name: SE_NODE_SESSION_TIMEOUT
|
||||
value: '300'
|
||||
image: selenium/node-edge:4.27.0
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 15
|
||||
name: selenium-edge
|
||||
ports:
|
||||
- containerPort: 5555
|
||||
name: node
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /status
|
||||
port: 5555
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 5
|
||||
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||
# tested-stable 2Gi limit. CPU unchanged.
|
||||
resources:
|
||||
limits:
|
||||
cpu: '1'
|
||||
memory: 2Gi
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
volumes:
|
||||
- emptyDir:
|
||||
medium: Memory
|
||||
sizeLimit: 2Gi
|
||||
name: dshm
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: selenium-hub
|
||||
namespace: selenium
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- kind: Rule
|
||||
match: Host(`selenium.iamworkin.lan`)
|
||||
services:
|
||||
- name: selenium-hub
|
||||
port: 4444
|
||||
tls:
|
||||
secretName: selenium-tls
|
||||
@@ -127,10 +127,13 @@ spec:
|
||||
initContainers:
|
||||
- name: fix-data-perms
|
||||
image: busybox:latest
|
||||
# Also chown /shared-tts (hostPath /tmp/tts-audio) so the non-root
|
||||
# app user (uid 1654) can write Piper .sln16 files that Asterisk
|
||||
# reads at /var/lib/asterisk/sounds/tts. World-readable (755) is
|
||||
# fine — Asterisk runs as a different uid in the other pod.
|
||||
# Must run as root to chown the hostPath /tmp/tts-audio that may be
|
||||
# root-owned after node reboot. Pod-level runAsNonRoot:true would
|
||||
# otherwise inherit and chown would fail with EPERM (see Notes memory
|
||||
# feedback_hostpath_initcontainer_chown_perms).
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsNonRoot: false
|
||||
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
||||
volumeMounts:
|
||||
- name: telephony-data
|
||||
|
||||
@@ -28,9 +28,12 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||
4. **First production render** — open `https://worldbuilder.iamworkin.lan`,
|
||||
create World → Character → Storyboard → ExportJob, confirm artifact
|
||||
downloads. ComfyUI lives on BLUEJAY-WS at `http://10.0.56.20:8188`.
|
||||
4. **First production render** — open
|
||||
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
|
||||
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
|
||||
generated images. This Sprint 32 visitor-safe profile uses
|
||||
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
|
||||
for an operator-owned GPU render lane.
|
||||
|
||||
## Health probes
|
||||
|
||||
@@ -53,8 +56,13 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||
|
||||
## Image generation backend
|
||||
|
||||
`FlowerCore:WorldBuilder:ImageGeneration:BaseUrl=http://10.0.56.20:8188` —
|
||||
ComfyUI runs on BLUEJAY-WS Windows (R9700 / gfx1201 / ROCm 7.2.1). Pod reaches
|
||||
the workstation directly across the 10.0.56.0/24 VLAN (no Podman-style host-
|
||||
filter issues — K8s pods route via Calico, which is L3-routed across the
|
||||
VLAN).
|
||||
Sprint 32 pins the Kubernetes profile to
|
||||
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
|
||||
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
|
||||
deterministic, avoids GPU exposure, and still exercises the studio/gallery
|
||||
surface with persisted generated-image metadata.
|
||||
|
||||
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
|
||||
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
|
||||
operator-owned follow-up that also verifies workstation reachability and image
|
||||
import freshness.
|
||||
|
||||
@@ -16,7 +16,11 @@ kind: Namespace
|
||||
metadata:
|
||||
name: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-worldbuilder
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
---
|
||||
# SQLite DB + generated image gallery + PDF/PNG exports.
|
||||
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
|
||||
@@ -25,6 +29,13 @@ kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: worldbuilder-data
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-data
|
||||
app.kubernetes.io/component: storage
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
@@ -40,7 +51,13 @@ metadata:
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
@@ -54,11 +71,16 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
flowercore.io/audit-trace-id: "worldbuilder-runtime-demo"
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
@@ -92,11 +114,14 @@ spec:
|
||||
value: "/data/gallery"
|
||||
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||
value: "/data/exports"
|
||||
# ComfyUI on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1).
|
||||
# Visitor-safe Sprint 32 profile: fake backend keeps public demo
|
||||
# rendering deterministic and avoids exposing BLUEJAY-WS GPU.
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||
value: "http://10.0.56.20:8188"
|
||||
value: "http://127.0.0.1:1"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||
value: "comfyui"
|
||||
value: "fake"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
|
||||
value: "fake"
|
||||
resources:
|
||||
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
|
||||
# time) while actual CPU usage is well below capacity. Idle Blazor
|
||||
@@ -165,7 +190,11 @@ metadata:
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
@@ -180,6 +209,13 @@ kind: Certificate
|
||||
metadata:
|
||||
name: worldbuilder-web-tls
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web-tls
|
||||
app.kubernetes.io/component: ingress
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
secretName: worldbuilder-web-tls
|
||||
issuerRef:
|
||||
@@ -200,6 +236,13 @@ kind: IngressRoute
|
||||
metadata:
|
||||
name: worldbuilder-web
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/component: ingress
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
|
||||
@@ -305,15 +305,17 @@ spec:
|
||||
path: /
|
||||
port: 8080
|
||||
initialDelaySeconds: 60
|
||||
timeoutSeconds: 5
|
||||
timeoutSeconds: 15
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 5
|
||||
timeoutSeconds: 15
|
||||
failureThreshold: 3
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
|
||||
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
@@ -0,0 +1,84 @@
|
||||
# openvoxserver Quadlet Durability
|
||||
|
||||
This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`.
|
||||
|
||||
## Current State
|
||||
|
||||
As of the Sprint 32 Cx-12 apply on 2026-05-17:
|
||||
|
||||
- `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key.
|
||||
- `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container.
|
||||
- `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed.
|
||||
- `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`.
|
||||
- `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present.
|
||||
|
||||
## Discovery
|
||||
|
||||
Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly.
|
||||
|
||||
```bash
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service"
|
||||
```
|
||||
|
||||
If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit.
|
||||
|
||||
## Durable Fix Shape
|
||||
|
||||
The Quadlet keeps the deploy key as a path reference only:
|
||||
|
||||
```ini
|
||||
Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts
|
||||
```
|
||||
|
||||
The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs:
|
||||
|
||||
```bash
|
||||
/usr/bin/podman exec openvoxserver git config --global --add safe.directory *
|
||||
```
|
||||
|
||||
The deploy script self-heals inside the container before it fetches the control repo:
|
||||
|
||||
```bash
|
||||
git config --global --add safe.directory "*" 2>/dev/null || true
|
||||
DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key"
|
||||
KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||
REPO="/etc/puppetlabs/code/environments/production"
|
||||
export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS"
|
||||
git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true
|
||||
```
|
||||
|
||||
## Validation
|
||||
|
||||
Non-destructive validation:
|
||||
|
||||
```bash
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l"
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand"
|
||||
```
|
||||
|
||||
Destructive recreate smoke is opt-in only:
|
||||
|
||||
```bash
|
||||
scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh
|
||||
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh"
|
||||
```
|
||||
|
||||
Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds.
|
||||
|
||||
## Credential Rotation Note
|
||||
|
||||
When rotating the Puppet deploy key, update the persisted serverdata copy on noc1:
|
||||
|
||||
```bash
|
||||
sudo install -m 0600 -o root -g root <new-deploy-key> /opt/puppet/serverdata/.puppet-deploy-key
|
||||
sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||
sudo systemctl start openvoxserver-safeconfig.service
|
||||
sudo systemctl start puppet-deploy.service
|
||||
```
|
||||
|
||||
Never commit the deploy key or print it in logs.
|
||||
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
@@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then
|
||||
echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2
|
||||
exit 64
|
||||
fi
|
||||
|
||||
SUDO="${SUDO:-sudo}"
|
||||
REPO="/etc/puppetlabs/code/environments/production"
|
||||
CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key"
|
||||
|
||||
if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then
|
||||
echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2
|
||||
exit 65
|
||||
fi
|
||||
|
||||
before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)"
|
||||
echo "Before recreate: $before"
|
||||
|
||||
$SUDO systemctl stop openvoxserver
|
||||
$SUDO podman rm openvoxserver 2>/dev/null || true
|
||||
$SUDO systemctl start openvoxserver
|
||||
|
||||
sleep 50
|
||||
|
||||
$SUDO systemctl start puppet-deploy.service
|
||||
sleep 5
|
||||
|
||||
$SUDO systemctl status puppet-deploy.service --no-pager -l
|
||||
|
||||
after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)"
|
||||
echo "After recreate origin/master: $after"
|
||||
|
||||
$SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests
|
||||
|
||||
core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)"
|
||||
case "$core_ssh" in
|
||||
*"$CORE_SSH_COMMAND_FRAGMENT"*) ;;
|
||||
*)
|
||||
echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
$SUDO podman exec openvoxserver git -C "$REPO" status --short --branch
|
||||
|
||||
echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure."
|
||||
206
tests/bluejay-infra-lint/DivoomPiDeployArtifactTests.cs
Normal file
206
tests/bluejay-infra-lint/DivoomPiDeployArtifactTests.cs
Normal file
@@ -0,0 +1,206 @@
|
||||
using FluentAssertions;
|
||||
using Xunit;
|
||||
|
||||
namespace BluejayInfraLint.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class DivoomPiDeployArtifactTests
|
||||
{
|
||||
private static readonly string Root = FindRepoRoot();
|
||||
private static readonly string DmRoot = Path.Combine(Root, "apps", "fc-divoom-dm-pi-device");
|
||||
private static readonly string TvRoot = Path.Combine(Root, "apps", "fc-divoom-tv-pi");
|
||||
|
||||
public static TheoryData<string> DmRequiredArtifacts => new()
|
||||
{
|
||||
"README.md",
|
||||
"hiera/edge2-divoom-dm-device.overlay.yaml",
|
||||
"puppet/profile/pi/service/divoom_dm_device.pp",
|
||||
"puppet/templates/divoom-device-registration.json.epp",
|
||||
"puppet/templates/flowercore-divoom-dm-agent.service.epp",
|
||||
};
|
||||
|
||||
public static TheoryData<string> TvRequiredArtifacts => new()
|
||||
{
|
||||
"README.md",
|
||||
"hiera/example-divoom-tv-pi.iamworkin.lan.yaml",
|
||||
"puppet/profile/pi/service/divoom_tv.pp",
|
||||
"systemd/flowercore-divoom-tv.service",
|
||||
"systemd/flowercore-divoom-tv-hdmi.service",
|
||||
"systemd/99-flowercore-divoom-tv-hdmi.rules",
|
||||
"scripts/flowercore-divoom-tv-prelaunch.sh",
|
||||
"scripts/flowercore-divoom-tv-launch.sh",
|
||||
"scripts/flowercore-divoom-tv-hdmi-respond.sh",
|
||||
};
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(DmRequiredArtifacts))]
|
||||
public void DmDeviceArtifacts_ArePresent(string relativePath)
|
||||
{
|
||||
File.Exists(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(TvRequiredArtifacts))]
|
||||
public void TvPiArtifacts_ArePresent(string relativePath)
|
||||
{
|
||||
File.Exists(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmDeviceReadme_DeclaresPuppetSystemdNotKubernetes()
|
||||
{
|
||||
var readme = ReadDm("README.md");
|
||||
|
||||
readme.Should().Contain("not a Kubernetes application");
|
||||
readme.Should().Contain("profile::pi::service::divoom");
|
||||
readme.Should().Contain("no K8s surface");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmHieraOverlay_PreservesExistingEdge2DivoomService()
|
||||
{
|
||||
var hiera = ReadDm("hiera/edge2-divoom-dm-device.overlay.yaml");
|
||||
|
||||
hiera.Should().Contain("fc-pimanager:");
|
||||
hiera.Should().Contain("fc-divoom:");
|
||||
hiera.Should().Contain("enabled: true");
|
||||
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_enabled: false");
|
||||
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_ensure: 'stopped'");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmPuppetProfile_DefaultsToStoppedDisabledService()
|
||||
{
|
||||
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
|
||||
|
||||
profile.Should().Contain("Boolean $service_enabled = false");
|
||||
profile.Should().Contain("Enum['running', 'stopped'] $service_ensure = 'stopped'");
|
||||
profile.Should().Contain("service { $service_name:");
|
||||
profile.Should().Contain("ensure => $service_ensure");
|
||||
profile.Should().Contain("enable => $service_enabled");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmPuppetProfile_DoesNotManageLiveDivoomWebUnit()
|
||||
{
|
||||
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
|
||||
|
||||
profile.Should().NotContain("Service['flowercore-divoom.service']");
|
||||
profile.Should().NotContain("service { 'flowercore-divoom.service'");
|
||||
profile.Should().NotContain("notify => Service");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmAgentUnit_IsSeparateAndGatedByExistingWrappers()
|
||||
{
|
||||
var unit = ReadDm("puppet/templates/flowercore-divoom-dm-agent.service.epp");
|
||||
|
||||
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh");
|
||||
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh");
|
||||
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh");
|
||||
unit.Should().Contain("ExecStart=<%= $agent_binary_path %> --mode=Pi");
|
||||
unit.Should().NotContain("flowercore-divoom.service");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DmRegistration_CarriesRenderProofAndSafetyPolicy()
|
||||
{
|
||||
var registration = ReadDm("puppet/templates/divoom-device-registration.json.epp");
|
||||
|
||||
registration.Should().Contain("\"candidateChannels\": <%= $bt_channels_json %>");
|
||||
registration.Should().Contain("\"deviceInfoIsRenderProof\": false");
|
||||
registration.Should().Contain("\"visibleRenderProofRequired\": <%= $visible_render_proof_required %>");
|
||||
registration.Should().Contain("\"preserveExistingService\": \"flowercore-divoom.service\"");
|
||||
registration.Should().Contain("\"doNotEnableFmRadio\": true");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TvService_UsesAvaloniaHdmiSafetyGates()
|
||||
{
|
||||
var unit = ReadTv("systemd/flowercore-divoom-tv.service");
|
||||
|
||||
unit.Should().Contain("ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv");
|
||||
unit.Should().Contain("Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv");
|
||||
unit.Should().Contain("RuntimeDirectoryMode=0700");
|
||||
unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh");
|
||||
unit.Should().Contain("ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh");
|
||||
unit.Should().Contain("MemoryMax=2G");
|
||||
unit.Should().Contain("PrivateTmp=true");
|
||||
unit.Should().NotContain("/tmp");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TvLauncher_PrefersCageAndFallsBackToDirectLaunch()
|
||||
{
|
||||
var script = ReadTv("scripts/flowercore-divoom-tv-launch.sh");
|
||||
|
||||
script.Should().Contain("command -v cage");
|
||||
script.Should().Contain("exec cage --");
|
||||
script.Should().Contain("launching FlowerCore.Divoom.Tv directly");
|
||||
script.Should().Contain("--target=hdmi");
|
||||
script.Should().Contain("--presentation-mode=${PRESENTATION_MODE}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TvHotplugRule_SettlesAndRestartsRenderer()
|
||||
{
|
||||
var rule = ReadTv("systemd/99-flowercore-divoom-tv-hdmi.rules");
|
||||
var responder = ReadTv("scripts/flowercore-divoom-tv-hdmi-respond.sh");
|
||||
|
||||
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
|
||||
rule.Should().Contain("start flowercore-divoom-tv-hdmi.service");
|
||||
responder.Should().Contain("sleep 2");
|
||||
responder.Should().Contain("systemctl restart flowercore-divoom-tv.service");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TvPuppetProfile_InstallsCageAndStaticArtifacts()
|
||||
{
|
||||
var profile = ReadTv("puppet/profile/pi/service/divoom_tv.pp");
|
||||
|
||||
profile.Should().Contain("package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']");
|
||||
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service'");
|
||||
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh'");
|
||||
profile.Should().Contain("profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules");
|
||||
profile.Should().Contain("Boolean $service_enabled = false");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DivoomArtifacts_DoNotAddKubernetesWorkloads()
|
||||
{
|
||||
var allText = Directory.GetFiles(DmRoot, "*", SearchOption.AllDirectories)
|
||||
.Concat(Directory.GetFiles(TvRoot, "*", SearchOption.AllDirectories))
|
||||
.Select(File.ReadAllText);
|
||||
|
||||
foreach (var text in allText)
|
||||
{
|
||||
text.Should().NotContain("kind: Deployment");
|
||||
text.Should().NotContain("kind: IngressRoute");
|
||||
text.Should().NotContain("kind: Certificate");
|
||||
text.Should().NotContain("kind: OnePasswordItem");
|
||||
}
|
||||
}
|
||||
|
||||
private static string ReadDm(string relativePath)
|
||||
=> File.ReadAllText(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||
|
||||
private static string ReadTv(string relativePath)
|
||||
=> File.ReadAllText(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||
|
||||
private static string FindRepoRoot()
|
||||
{
|
||||
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
while (current is not null)
|
||||
{
|
||||
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||
{
|
||||
return current.FullName;
|
||||
}
|
||||
|
||||
current = current.Parent;
|
||||
}
|
||||
|
||||
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,8 @@ public sealed class FleetManifestLintTests
|
||||
|
||||
private static readonly HashSet<string> PublicReadOnlyHosts = new(StringComparer.Ordinal)
|
||||
{
|
||||
"brochure.flowercore.io",
|
||||
"dist.flowercore.io",
|
||||
"dns.iamworkin.lan",
|
||||
};
|
||||
|
||||
// Public hosts that allow a tightly bounded write surface in addition to
|
||||
@@ -54,6 +54,45 @@ public sealed class FleetManifestLintTests
|
||||
"ttsreader-piper",
|
||||
};
|
||||
|
||||
private static readonly IReadOnlyDictionary<string, string> LinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["github-runner"] = "https://github.com/astoltz/FlowerCore.Common",
|
||||
["github-runner-sharedpos"] = "https://github.com/astoltz/FlowerCore.Shared.Pos",
|
||||
["github-runner-puppet"] = "https://github.com/astoltz/FlowerCore.Puppet",
|
||||
["github-runner-signage"] = "https://github.com/astoltz/FlowerCore.Signage",
|
||||
["github-runner-dms"] = "https://github.com/astoltz/FlowerCore.DMS",
|
||||
["github-runner-telephony"] = "https://github.com/astoltz/FlowerCore.Telephony",
|
||||
["github-runner-print-web"] = "https://github.com/astoltz/FlowerCore.Print.Web",
|
||||
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
|
||||
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
|
||||
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
|
||||
["github-runner-updater"] = "https://github.com/astoltz/FlowerCore.Updater",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
||||
{
|
||||
"github-runner-sharedpos",
|
||||
"github-runner-puppet",
|
||||
"github-runner-signage",
|
||||
"github-runner-dms",
|
||||
"github-runner-telephony",
|
||||
"github-runner-print-web",
|
||||
"github-runner-chat",
|
||||
"github-runner-mysql",
|
||||
"github-runner-kiosk-linux",
|
||||
"github-runner-updater",
|
||||
};
|
||||
|
||||
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["HOME"] = "/home/runner",
|
||||
["DOTNET_INSTALL_DIR"] = "/home/runner/.dotnet",
|
||||
["DOTNET_CLI_HOME"] = "/home/runner",
|
||||
["NUGET_PACKAGES"] = "/home/runner/.nuget/packages",
|
||||
["XDG_CACHE_HOME"] = "/home/runner/.cache",
|
||||
["RUNNER_TOOL_CACHE"] = "/home/runner/_tool",
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
||||
{
|
||||
@@ -187,6 +226,279 @@ public sealed class FleetManifestLintTests
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GitHubRunnerFleet_MustRegisterRequiredReposAsRepoScopedDeployments()
|
||||
{
|
||||
var deployments = GitHubRunnerDeployments();
|
||||
|
||||
foreach (var expectedRunner in LinuxRunnerRepos)
|
||||
{
|
||||
deployments.Should().ContainKey(expectedRunner.Key);
|
||||
|
||||
var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject;
|
||||
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
||||
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
||||
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
||||
EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
|
||||
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
|
||||
EnvSecretName(container, "ACCESS_TOKEN").Should().Be("github-runner-token");
|
||||
EnvSecretKey(container, "ACCESS_TOKEN").Should().Be("credential");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GitHubRunnerFleet_MustSetWritableNonRootDotnetAndCachePaths()
|
||||
{
|
||||
foreach (var deployment in GitHubRunnerDeployments().Values)
|
||||
{
|
||||
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
foreach (var expectedEnv in WritableRunnerEnv)
|
||||
{
|
||||
EnvValue(container, expectedEnv.Key).Should().Be(expectedEnv.Value, $"{deployment.Name} must keep .NET paths writable for uid 1001");
|
||||
}
|
||||
|
||||
var mounts = ManifestNodeExtensions.MappingSequence(container, "volumeMounts")
|
||||
.ToDictionary(
|
||||
mount => ManifestNodeExtensions.Scalar(mount, "name") ?? string.Empty,
|
||||
mount => ManifestNodeExtensions.Scalar(mount, "mountPath") ?? string.Empty,
|
||||
StringComparer.Ordinal);
|
||||
|
||||
mounts.Should().Contain("runner-home", "/home/runner");
|
||||
mounts.Should().Contain("nuget-cache", "/home/runner/.nuget/packages");
|
||||
mounts.Should().Contain("tmp", "/tmp");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForScaledDeployments()
|
||||
{
|
||||
var deployments = GitHubRunnerDeployments();
|
||||
|
||||
foreach (var deploymentName in ScaledLinuxRunnerDeployments)
|
||||
{
|
||||
var deployment = deployments[deploymentName];
|
||||
// Scaled runners must have >= 2 replicas (avoid single-pod bottleneck).
|
||||
// Individual deployments may be tuned upward per CI activity — see
|
||||
// "runners: right-size replica counts per 14d CI activity (#24)".
|
||||
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(2, $"{deploymentName} is in the scaled set and must run with at least 2 replicas");
|
||||
|
||||
var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
|
||||
var claimNames = volumes
|
||||
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.ToList();
|
||||
|
||||
claimNames.Should().BeEmpty($"{deploymentName} is scaled and must not share a RWO PVC");
|
||||
volumes.Should().Contain(volume =>
|
||||
string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
|
||||
&& ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
|
||||
}
|
||||
|
||||
var common = deployments["github-runner"];
|
||||
ReplicaCount(common).Should().Be(1);
|
||||
common.MappingSequence("spec", "template", "spec", "volumes")
|
||||
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.Should()
|
||||
.ContainSingle()
|
||||
.Which
|
||||
.Should()
|
||||
.Be("github-runner-nuget-cache");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Runners_MustNotPinToOperatorWorkstationHosts()
|
||||
{
|
||||
// CRITICAL SAFETY (operator directive 2026-05-26): BLUEJAY-WS is the
|
||||
// operator's primary workstation — host of the 1Password Connect
|
||||
// bearer token, fcadmin SSH keys to noc1, signing CA private keys,
|
||||
// and source for every FC repo. A self-hosted GitHub Actions runner
|
||||
// there would execute arbitrary PR code with that local access.
|
||||
// Build-side analog of the Sprint 9 NEW safe-account exclusion gate
|
||||
// (Puppet GPO/AppLocker/WDAC/audit-forwarder modules refuse to apply
|
||||
// on BLUEJAY-WS). This lint asserts no GitHub-runner Deployment in
|
||||
// apps/github-runner/ pins to a forbidden operator-workstation host
|
||||
// via nodeName, nodeSelector, nodeAffinity, or tolerations.
|
||||
// Existing legacy `bluejay-ws-sandbox-1` GitHub-registered runner is
|
||||
// out of scope here (it's a runtime registration, not a K8s
|
||||
// Deployment) — see CLAUDE.md "Common Mistakes" entry and
|
||||
// feedback_bluejay_ws_never_public_runner.md.
|
||||
var forbiddenHostPatterns = new[]
|
||||
{
|
||||
"bluejay-ws",
|
||||
"BLUEJAY-WS",
|
||||
"bluejay-ws.iamworkin.lan",
|
||||
"iamworkin-ws",
|
||||
};
|
||||
|
||||
bool ContainsForbidden(string? value) =>
|
||||
!string.IsNullOrWhiteSpace(value)
|
||||
&& forbiddenHostPatterns.Any(pattern => value!.Contains(pattern, StringComparison.OrdinalIgnoreCase));
|
||||
|
||||
var violations = GitHubRunnerDeployments().Values.SelectMany(deployment =>
|
||||
{
|
||||
var local = new List<string>();
|
||||
var podSpec = ManifestNodeExtensions.Mapping(deployment.Root, "spec", "template", "spec");
|
||||
if (podSpec is null)
|
||||
{
|
||||
return local;
|
||||
}
|
||||
|
||||
// nodeName: pins the pod to a specific node by name.
|
||||
var nodeName = ManifestNodeExtensions.Scalar(podSpec, "nodeName");
|
||||
if (ContainsForbidden(nodeName))
|
||||
{
|
||||
local.Add($"{deployment.Name} sets nodeName='{nodeName}' which targets a forbidden operator-workstation host.");
|
||||
}
|
||||
|
||||
// nodeSelector: dict of label → value pinning the pod to nodes
|
||||
// carrying matching labels. Examples that would trip this:
|
||||
// kubernetes.io/hostname: bluejay-ws
|
||||
// flowercore.io/host: bluejay-ws.iamworkin.lan
|
||||
var nodeSelector = ManifestNodeExtensions.Mapping(podSpec, "nodeSelector");
|
||||
if (nodeSelector is not null)
|
||||
{
|
||||
foreach (var entry in nodeSelector.Children)
|
||||
{
|
||||
var key = entry.Key is YamlScalarNode keyScalar ? keyScalar.Value : null;
|
||||
var value = entry.Value is YamlScalarNode valueScalar ? valueScalar.Value : null;
|
||||
if (ContainsForbidden(value))
|
||||
{
|
||||
local.Add($"{deployment.Name} has nodeSelector entry '{key}: {value}' which targets a forbidden operator-workstation host.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nodeAffinity: matchExpressions over node labels.
|
||||
foreach (var term in ManifestNodeExtensions.MappingSequence(podSpec, "affinity", "nodeAffinity", "requiredDuringSchedulingIgnoredDuringExecution", "nodeSelectorTerms"))
|
||||
{
|
||||
foreach (var expr in ManifestNodeExtensions.MappingSequence(term, "matchExpressions"))
|
||||
{
|
||||
var key = ManifestNodeExtensions.Scalar(expr, "key");
|
||||
foreach (var valueNode in ManifestNodeExtensions.ScalarSequence(expr, "values"))
|
||||
{
|
||||
if (ContainsForbidden(valueNode))
|
||||
{
|
||||
local.Add($"{deployment.Name} has nodeAffinity matchExpression '{key}' value '{valueNode}' which targets a forbidden operator-workstation host.");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// tolerations: scheduling onto a tainted operator-workstation
|
||||
// node would let the runner run there. Forbid any toleration
|
||||
// value that names the workstation.
|
||||
foreach (var toleration in ManifestNodeExtensions.MappingSequence(podSpec, "tolerations"))
|
||||
{
|
||||
var key = ManifestNodeExtensions.Scalar(toleration, "key");
|
||||
var value = ManifestNodeExtensions.Scalar(toleration, "value");
|
||||
if (ContainsForbidden(key))
|
||||
{
|
||||
local.Add($"{deployment.Name} has toleration key '{key}' which targets a forbidden operator-workstation host.");
|
||||
}
|
||||
if (ContainsForbidden(value))
|
||||
{
|
||||
local.Add($"{deployment.Name} has toleration value '{value}' which targets a forbidden operator-workstation host.");
|
||||
}
|
||||
}
|
||||
|
||||
return local;
|
||||
}).ToList();
|
||||
|
||||
violations.Should().BeEmpty("BLUEJAY-WS / iamworkin-ws must never host a fleet GitHub Actions runner; see CLAUDE.md 'Registering BLUEJAY-WS as a fleet GitHub Actions runner' and feedback_bluejay_ws_never_public_runner.md");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_MustAlertWhenLinuxRunnerDeploymentIsUnavailable()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("MacMiniRunnerOffline");
|
||||
monitoring.Should().Contain("LinuxRunnerOffline");
|
||||
monitoring.Should().Contain("kube_deployment_status_replicas_ready");
|
||||
monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
|
||||
monitoring.Should().Contain("folder: CI Alerts");
|
||||
monitoring.Should().Contain("uid: linux-runner-offline");
|
||||
monitoring.Should().Contain("alert_channel: irc");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_GenericKubernetesAlerts_MustExcludeEphemeralGithubRunnerNamespace()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}");
|
||||
monitoring.Should().Contain("and on(namespace, pod) kube_pod_info");
|
||||
monitoring.Should().Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
|
||||
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("https://chat.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://dist.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://dms.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://print.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://knowledge.iamworkin.lan/healthz");
|
||||
monitoring.Should().Contain("https://library.iamworkin.lan/health");
|
||||
monitoring.Should().Contain("https://aistation.iamworkin.lan/healthz");
|
||||
monitoring.Should().NotContain("https://print.iamworkin.lan/\"");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OidcEnforcedDeployments_WithHttpHealthzProbes_MustDeclareAnonymousHealthzContract()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.SelectMany(document => document.MainContainerMappings()
|
||||
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Oidc__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
|
||||
.Where(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|
||||
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz")
|
||||
.Where(_ => !string.Equals(
|
||||
PodAnnotation(document, "flowercore.io/healthz-auth-policy"),
|
||||
"allow-anonymous",
|
||||
StringComparison.Ordinal))
|
||||
.Select(container =>
|
||||
{
|
||||
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
|
||||
return $"{document.Descriptor} container '{containerName}' enforces OIDC while probing /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous.";
|
||||
}))
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||
{
|
||||
var knowledge = Inventory.Documents
|
||||
.Single(document => document.Kind == "Deployment" && document.Namespace == "knowledge" && document.Name == "knowledge-web");
|
||||
var container = knowledge.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||
PodAnnotation(knowledge, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Distribution_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
|
||||
{
|
||||
var distribution = Inventory.Documents
|
||||
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
|
||||
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
|
||||
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||
{
|
||||
@@ -291,6 +603,324 @@ public sealed class FleetManifestLintTests
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_MustShipExpectedManifestSet()
|
||||
{
|
||||
var appRoot = Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt");
|
||||
Directory.Exists(appRoot).Should().BeTrue("Sprint 8 Cx-5 owns apps/fc-devicemgmt.");
|
||||
|
||||
var expectedFiles = new[]
|
||||
{
|
||||
"1password-item.yaml",
|
||||
"argocd-application.yaml",
|
||||
"certificate-web.yaml",
|
||||
"clusterrole-operator.yaml",
|
||||
"clusterrolebinding-operator.yaml",
|
||||
"deployment-operator.yaml",
|
||||
"deployment-web.yaml",
|
||||
"ingressroute-web.yaml",
|
||||
"namespace.yaml",
|
||||
"network-policy.yaml",
|
||||
"service-web.yaml",
|
||||
"serviceaccount-operator.yaml",
|
||||
};
|
||||
|
||||
Directory.GetFiles(appRoot, "*.yaml")
|
||||
.Select(Path.GetFileName)
|
||||
.Should()
|
||||
.BeEquivalentTo(expectedFiles);
|
||||
|
||||
foreach (var expectedFile in expectedFiles)
|
||||
{
|
||||
FcDeviceManagementDocuments()
|
||||
.Should()
|
||||
.Contain(document => document.RelativePath == $"fc-devicemgmt/{expectedFile}");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_ObjectsMustCarryStandardTraceabilityLabels()
|
||||
{
|
||||
var requiredLabels = new[]
|
||||
{
|
||||
"app.kubernetes.io/name",
|
||||
"app.kubernetes.io/part-of",
|
||||
"app.kubernetes.io/managed-by",
|
||||
"flowercore.io/tenant-id",
|
||||
"flowercore.io/created-by",
|
||||
};
|
||||
|
||||
var violations = FcDeviceManagementDocuments()
|
||||
.SelectMany(document => requiredLabels
|
||||
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("metadata", "labels", label)))
|
||||
.Select(label => $"{document.Descriptor} is missing metadata.labels['{label}']."))
|
||||
.Concat(FcDeviceManagementDocuments()
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.SelectMany(document => requiredLabels
|
||||
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "labels", label)))
|
||||
.Select(label => $"{document.Descriptor} pod template is missing metadata.labels['{label}'].")))
|
||||
.Concat(FcDeviceManagementDocuments()
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.Where(document => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "annotations", "flowercore.io/audit-trace-id")))
|
||||
.Select(document => $"{document.Descriptor} pod template is missing flowercore.io/audit-trace-id."))
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_IngressMustUseCertManagerAndKeepPublicHostDisabled()
|
||||
{
|
||||
var appText = string.Join(
|
||||
Environment.NewLine,
|
||||
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
|
||||
.Select(File.ReadAllText));
|
||||
|
||||
appText.Should().NotContain("certResolver");
|
||||
appText.Should().Contain("update.flowercore.io");
|
||||
appText.Should().Contain("disabled-until-Q-OIDC-1");
|
||||
|
||||
FcDeviceManagementDocuments()
|
||||
.Where(document => document.Kind == "IngressRoute")
|
||||
.SelectMany(document => document.MappingSequence("spec", "routes"))
|
||||
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
|
||||
.Should()
|
||||
.Contain(match => match.Contains("Host(`devices.iamworkin.lan`)", StringComparison.Ordinal))
|
||||
.And.NotContain(match => match.Contains("Host(`update.flowercore.io`)", StringComparison.Ordinal));
|
||||
|
||||
var certificate = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "Certificate" && document.Name == "fc-devicemgmt-web-tls");
|
||||
|
||||
certificate.Scalar("spec", "issuerRef", "name").Should().Be("step-ca-acme");
|
||||
certificate.Scalar("spec", "issuerRef", "kind").Should().Be("ClusterIssuer");
|
||||
ManifestNodeExtensions.ScalarSequence(certificate.Root, "spec", "dnsNames")
|
||||
.Should()
|
||||
.ContainSingle("devices.iamworkin.lan");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
|
||||
{
|
||||
var clusterRole = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
|
||||
var allScalars = clusterRole.AllScalars().ToList();
|
||||
|
||||
allScalars.Should().Contain("devices.flowercore.io");
|
||||
allScalars.Should().Contain("*");
|
||||
allScalars.Should().Contain("deployments");
|
||||
allScalars.Should().Contain("get");
|
||||
|
||||
var operatorDeployment = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == "fc-devicemgmt-operator");
|
||||
|
||||
operatorDeployment.AllScalars().Should().Contain("FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT");
|
||||
operatorDeployment.AllScalars().Should().Contain("fc-devicemgmt-operator");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_RuntimeSecretsMustUseOnePasswordItemPattern()
|
||||
{
|
||||
var item = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "OnePasswordItem" && document.Name == "fc-devicemgmt-runtime");
|
||||
|
||||
item.Scalar("spec", "itemPath")
|
||||
.Should()
|
||||
.Be("vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime");
|
||||
|
||||
var appText = string.Join(
|
||||
Environment.NewLine,
|
||||
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
|
||||
.Select(File.ReadAllText));
|
||||
|
||||
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
|
||||
appText.Should().Contain("secretKeyRef:");
|
||||
appText.Should().Contain("secretName: fc-devicemgmt-runtime");
|
||||
appText.Should().NotContain("stringData:");
|
||||
appText.Should().NotContain("from-literal");
|
||||
appText.Should().NotContain("tls.key:");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_NetworkPoliciesMustAllowLanAgentsSynologyAndDnatPorts()
|
||||
{
|
||||
var policies = FcDeviceManagementDocuments()
|
||||
.Where(document => document.Kind == "NetworkPolicy")
|
||||
.ToList();
|
||||
|
||||
policies.Should().HaveCount(2);
|
||||
|
||||
var combinedScalars = policies.SelectMany(policy => policy.AllScalars()).ToList();
|
||||
combinedScalars.Should().Contain("10.0.56.0/24");
|
||||
combinedScalars.Should().Contain("10.0.57.0/24");
|
||||
combinedScalars.Should().Contain("10.0.58.0/24");
|
||||
combinedScalars.Should().Contain("10.0.68.0/27");
|
||||
combinedScalars.Should().Contain("10.0.58.3/32");
|
||||
|
||||
var combinedEgressPorts = policies.SelectMany(policy => policy.EgressPorts()).ToHashSet(StringComparer.Ordinal);
|
||||
combinedEgressPorts.Should().Contain(new[] { "80", "443", "8080", "8443", "2049", "111" });
|
||||
|
||||
var traefikVipPolicies = policies
|
||||
.Where(policy => policy.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal)))
|
||||
.ToList();
|
||||
|
||||
traefikVipPolicies.Should().ContainSingle();
|
||||
traefikVipPolicies[0].EgressPorts().Should().Contain(new[] { "80", "443", "8080", "8443" });
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
|
||||
{
|
||||
var application = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
|
||||
|
||||
application.Namespace.Should().Be("argocd");
|
||||
application.Scalar("spec", "source", "repoURL")
|
||||
.Should()
|
||||
.Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
|
||||
application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
|
||||
application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OidcFlipServices_AreGitOpsManagedWithHealthzProbes()
|
||||
{
|
||||
var deployments = new[]
|
||||
{
|
||||
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client"),
|
||||
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client"),
|
||||
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client"),
|
||||
};
|
||||
|
||||
foreach (var expected in deployments)
|
||||
{
|
||||
var deployment = AppDocuments(expected.App)
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == expected.Name);
|
||||
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
(EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId"))
|
||||
.Should()
|
||||
.Be(expected.Slug);
|
||||
EnvSecretName(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be(expected.Secret);
|
||||
EnvSecretOptional(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be("true");
|
||||
|
||||
ProbePath(container, "readinessProbe").Should().Be("/healthz");
|
||||
if (ProbePath(container, "startupProbe") is { } startupProbePath)
|
||||
{
|
||||
startupProbePath.Should().Be("/healthz");
|
||||
}
|
||||
|
||||
if (ProbePath(container, "livenessProbe") is { } livenessProbePath)
|
||||
{
|
||||
livenessProbePath.Should().Be("/healthz");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void OidcFlipServices_UseOnePasswordItemClientSecrets()
|
||||
{
|
||||
var expectedItems = new Dictionary<string, (string Name, string ItemPath)>(StringComparer.Ordinal)
|
||||
{
|
||||
["fc-dns"] = ("dns-oidc-client", "vaults/IAmWorkin/items/dns-oidc-client"),
|
||||
["fc-media"] = ("media-oidc-client", "vaults/IAmWorkin/items/media-oidc-client"),
|
||||
["fc-distribution"] = ("distribution-oidc-client", "vaults/IAmWorkin/items/distribution-oidc-client"),
|
||||
};
|
||||
|
||||
foreach (var expected in expectedItems)
|
||||
{
|
||||
var item = AppDocuments(expected.Key)
|
||||
.Single(document => document.Kind == "OnePasswordItem" && document.Name == expected.Value.Name);
|
||||
|
||||
item.Scalar("spec", "itemPath").Should().Be(expected.Value.ItemPath);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DnsAndMediaGitOpsAdoption_PreservesLiveStorageAndImageShape()
|
||||
{
|
||||
var dnsDeployment = AppDocuments("fc-dns")
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == "dns-web");
|
||||
var dnsContainer = dnsDeployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
var dnsPvc = AppDocuments("fc-dns")
|
||||
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data");
|
||||
|
||||
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260604-oidc-proper");
|
||||
dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
|
||||
dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi");
|
||||
|
||||
var mediaDeployment = AppDocuments("fc-media")
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == "fc-media-web");
|
||||
var mediaContainer = mediaDeployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
var mediaPvc = AppDocuments("fc-media")
|
||||
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "fc-media-data");
|
||||
|
||||
ManifestNodeExtensions.Scalar(mediaContainer, "image").Should().Be("localhost/fc-media-web:v20260604-oidc-proper");
|
||||
mediaPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
|
||||
mediaPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("20Gi");
|
||||
|
||||
mediaDeployment.AllScalars().Should().Contain(new[]
|
||||
{
|
||||
"/volume1/kubernetes/fc-media-transcodes",
|
||||
"/volume1/kubernetes/fc-media-inbox",
|
||||
"/volume1/video",
|
||||
});
|
||||
|
||||
var distributionDeployment = AppDocuments("fc-distribution")
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == "fc-distribution");
|
||||
var distributionContainer = distributionDeployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
ManifestNodeExtensions.Scalar(distributionContainer, "image").Should().Be("localhost/fc-distribution:v20260604-oidc-root-anon");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void MonitoringProbes_UseHealthzForOidcGatedHosts()
|
||||
{
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
monitoring.Should().Contain("\"https://dns.iamworkin.lan/healthz\"");
|
||||
monitoring.Should().Contain("\"https://dist.iamworkin.lan/healthz\"");
|
||||
monitoring.Should().Contain("\"https://media.iamworkin.lan/healthz\"");
|
||||
monitoring.Should().NotContain("\"https://dns.iamworkin.lan/\"");
|
||||
monitoring.Should().NotContain("\"https://dist.iamworkin.lan/\"");
|
||||
monitoring.Should().NotContain("\"https://media.iamworkin.lan/\"");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DistributionPublicIngress_KeepsGetHeadMethodAllowlist()
|
||||
{
|
||||
var publicIngress = AppDocuments("fc-distribution")
|
||||
.Single(document => document.Kind == "IngressRoute" && document.Name == "fc-distribution-public");
|
||||
var route = publicIngress.MappingSequence("spec", "routes").Should().ContainSingle().Subject;
|
||||
var match = ManifestNodeExtensions.Scalar(route, "match");
|
||||
|
||||
match.Should().Contain("Host(`dist.flowercore.io`)");
|
||||
match.Should().Contain("Method(`GET`)");
|
||||
match.Should().Contain("Method(`HEAD`)");
|
||||
match.Should().NotContain("Method(`POST`)");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DnsAndMediaIngressRoutes_MatchLiveInternalHosts()
|
||||
{
|
||||
var dnsRoute = AppDocuments("fc-dns")
|
||||
.Single(document => document.Kind == "IngressRoute" && document.Name == "dns-web")
|
||||
.MappingSequence("spec", "routes")
|
||||
.Should()
|
||||
.ContainSingle()
|
||||
.Subject;
|
||||
var mediaRoute = AppDocuments("fc-media")
|
||||
.Single(document => document.Kind == "IngressRoute" && document.Name == "fc-media-web")
|
||||
.MappingSequence("spec", "routes")
|
||||
.Should()
|
||||
.ContainSingle()
|
||||
.Subject;
|
||||
|
||||
ManifestNodeExtensions.Scalar(dnsRoute, "match").Should().Be("Host(`dns.iamworkin.lan`)");
|
||||
ManifestNodeExtensions.Scalar(mediaRoute, "match").Should().Be("Host(`media.iamworkin.lan`)");
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ProbeViolations(
|
||||
ManifestDocument document,
|
||||
YamlMappingNode container,
|
||||
@@ -314,6 +944,83 @@ public sealed class FleetManifestLintTests
|
||||
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
||||
};
|
||||
}
|
||||
|
||||
private static IReadOnlyDictionary<string, ManifestDocument> GitHubRunnerDeployments()
|
||||
{
|
||||
return Inventory.Documents
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.Where(document => document.Namespace == "github-runner")
|
||||
.ToDictionary(document => document.Name, StringComparer.Ordinal);
|
||||
}
|
||||
|
||||
private static int ReplicaCount(ManifestDocument document)
|
||||
{
|
||||
return int.TryParse(document.Scalar("spec", "replicas"), out var replicas) ? replicas : 1;
|
||||
}
|
||||
|
||||
private static string? EnvValue(YamlMappingNode container, string name)
|
||||
{
|
||||
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
|
||||
}
|
||||
|
||||
private static string? EnvSecretName(YamlMappingNode container, string name)
|
||||
{
|
||||
return EnvMapping(container, name) is { } env
|
||||
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "name")
|
||||
: null;
|
||||
}
|
||||
|
||||
private static string? EnvSecretKey(YamlMappingNode container, string name)
|
||||
{
|
||||
return EnvMapping(container, name) is { } env
|
||||
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "key")
|
||||
: null;
|
||||
}
|
||||
|
||||
private static string? EnvSecretOptional(YamlMappingNode container, string name)
|
||||
{
|
||||
return EnvMapping(container, name) is { } env
|
||||
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "optional")
|
||||
: null;
|
||||
}
|
||||
|
||||
private static string? ProbePath(YamlMappingNode container, string probeKey)
|
||||
{
|
||||
return ManifestNodeExtensions.Scalar(container, probeKey, "httpGet", "path");
|
||||
}
|
||||
|
||||
private static IReadOnlyList<ManifestDocument> AppDocuments(string app)
|
||||
{
|
||||
return Inventory.Documents
|
||||
.Where(document => document.RelativePath.StartsWith($"{app}/", StringComparison.Ordinal))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name)
|
||||
{
|
||||
return ManifestNodeExtensions.MappingSequence(container, "env")
|
||||
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
private static string? PodAnnotation(ManifestDocument document, string name)
|
||||
{
|
||||
return document.Scalar("spec", "template", "metadata", "annotations", name);
|
||||
}
|
||||
|
||||
private static string? ProbeHttpGetPath(YamlMappingNode container, string probeKey)
|
||||
{
|
||||
return ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
|
||||
&& ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)
|
||||
? ManifestNodeExtensions.Scalar(httpGet, "path")
|
||||
: null;
|
||||
}
|
||||
|
||||
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
|
||||
{
|
||||
return Inventory.Documents
|
||||
.Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed class ManifestInventory
|
||||
@@ -537,6 +1244,22 @@ internal sealed record ManifestDocument(
|
||||
.ToList();
|
||||
}
|
||||
|
||||
// MainContainerMappings excludes initContainers. Use this when asserting
|
||||
// properties of the primary container (env, image, volumeMounts) where an
|
||||
// initContainer would be a false-positive match — e.g. the GitHub runner
|
||||
// image's `setup-runner-home` initContainer should not count toward the
|
||||
// single-container assertions on the runner deployments.
|
||||
public IReadOnlyList<YamlMappingNode> MainContainerMappings()
|
||||
{
|
||||
var podSpec = PodSpec();
|
||||
if (podSpec is null)
|
||||
{
|
||||
return Array.Empty<YamlMappingNode>();
|
||||
}
|
||||
|
||||
return ManifestNodeExtensions.MappingSequence(podSpec, "containers").ToList();
|
||||
}
|
||||
|
||||
public IReadOnlyList<ContainerSpec> ContainerSpecs()
|
||||
{
|
||||
return ContainerMappings()
|
||||
|
||||
99
tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
Normal file
99
tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
Normal file
@@ -0,0 +1,99 @@
|
||||
using FluentAssertions;
|
||||
using Xunit;
|
||||
|
||||
namespace BluejayInfraLint.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class OpenVoxServerDurabilityTests
|
||||
{
|
||||
private static readonly string Root = FindRepoRoot();
|
||||
private static readonly string RunbookPath = Path.Combine(Root, "docs", "runbooks", "openvoxserver-quadlet-durability.md");
|
||||
private static readonly string SmokePath = Path.Combine(Root, "scripts", "monitoring", "openvox-recreate-smoke.sh");
|
||||
|
||||
[Fact]
|
||||
public void Runbook_DocumentsHostArtifactAndNonArgoPath()
|
||||
{
|
||||
var runbook = File.ReadAllText(RunbookPath);
|
||||
|
||||
runbook.Should().Contain("noc1 host artifact");
|
||||
runbook.Should().Contain("not an ArgoCD application");
|
||||
runbook.Should().Contain("systemctl cat openvoxserver");
|
||||
runbook.Should().Contain("/etc/containers/systemd/openvoxserver.container");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Runbook_DocumentsCx12LiveApplyState()
|
||||
{
|
||||
var runbook = File.ReadAllText(RunbookPath);
|
||||
|
||||
runbook.Should().Contain("Sprint 32 Cx-12");
|
||||
runbook.Should().Contain("openvoxserver-safeconfig.service");
|
||||
runbook.Should().Contain("/opt/puppet/r10k-deploy.sh");
|
||||
runbook.Should().Contain("HEAD == origin/master");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SmokeScript_IsExplicitlyOptIn()
|
||||
{
|
||||
var smoke = File.ReadAllText(SmokePath);
|
||||
|
||||
smoke.Should().Contain("OPENVOX_RECREATE_SMOKE");
|
||||
smoke.Should().Contain("exit 64");
|
||||
smoke.IndexOf("OPENVOX_RECREATE_SMOKE", StringComparison.Ordinal)
|
||||
.Should().BeLessThan(smoke.IndexOf("systemctl stop openvoxserver", StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SmokeScript_RequiresGeneratedSystemdUnitBeforeRemovingContainer()
|
||||
{
|
||||
var smoke = File.ReadAllText(SmokePath);
|
||||
|
||||
smoke.Should().Contain("systemctl cat openvoxserver");
|
||||
smoke.Should().Contain("refusing to remove a container without a verified systemd recreate path");
|
||||
smoke.IndexOf("systemctl cat openvoxserver", StringComparison.Ordinal)
|
||||
.Should().BeLessThan(smoke.IndexOf("podman rm openvoxserver", StringComparison.Ordinal));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Artifacts_DoNotStoreSecretsOrPaidRunnerLabels()
|
||||
{
|
||||
var forbidden = new[]
|
||||
{
|
||||
"BEGIN OPENSSH PRIVATE KEY",
|
||||
"BEGIN RSA PRIVATE KEY",
|
||||
"ubuntu-latest",
|
||||
"windows-latest",
|
||||
"macos-latest",
|
||||
};
|
||||
|
||||
var violations = new[] { RunbookPath, SmokePath }
|
||||
.SelectMany(path =>
|
||||
{
|
||||
var text = File.ReadAllText(path);
|
||||
return forbidden
|
||||
.Where(token => text.Contains(token, StringComparison.OrdinalIgnoreCase))
|
||||
.Select(token => $"{Path.GetRelativePath(Root, path)} contains forbidden token {token}");
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
private static string FindRepoRoot()
|
||||
{
|
||||
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
while (current is not null)
|
||||
{
|
||||
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||
&& Directory.Exists(Path.Combine(current.FullName, "scripts"))
|
||||
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||
{
|
||||
return current.FullName;
|
||||
}
|
||||
|
||||
current = current.Parent;
|
||||
}
|
||||
|
||||
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
||||
}
|
||||
}
|
||||
269
tests/bluejay-infra-lint/PiSignagePlayerArtifactTests.cs
Normal file
269
tests/bluejay-infra-lint/PiSignagePlayerArtifactTests.cs
Normal file
@@ -0,0 +1,269 @@
|
||||
using System.Text.Json;
|
||||
using FluentAssertions;
|
||||
using Xunit;
|
||||
|
||||
namespace BluejayInfraLint.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class PiSignagePlayerArtifactTests
|
||||
{
|
||||
private static readonly string Root = FindRepoRoot();
|
||||
private static readonly string AppRoot = Path.Combine(Root, "apps", "fc-signage-pi-player");
|
||||
|
||||
public static TheoryData<string> RequiredArtifacts => new()
|
||||
{
|
||||
"README.md",
|
||||
"systemd/flowercore-signage-player-pi.service",
|
||||
"systemd/flowercore-signage-player-pi-hdmi.service",
|
||||
"systemd/flowercore-signage-bootstrap.service",
|
||||
"systemd/flowercore-signage-renew.service",
|
||||
"systemd/flowercore-signage-renew.timer",
|
||||
"systemd/flowercore-signage-detect-display.service",
|
||||
"systemd/flowercore-signage-detect-display.timer",
|
||||
"systemd/99-flowercore-signage-hdmi.rules",
|
||||
"chromium-policies/flowercore-signage.json",
|
||||
"scripts/flowercore-signage-launch.sh",
|
||||
"scripts/flowercore-signage-prelaunch.sh",
|
||||
"scripts/flowercore-signage-bootstrap.sh",
|
||||
"scripts/flowercore-signage-renew-cert.sh",
|
||||
"scripts/flowercore-signage-hdmi-respond.sh",
|
||||
"scripts/fc-signage-detect-display",
|
||||
};
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(RequiredArtifacts))]
|
||||
public void RequiredArtifacts_ArePresent(string relativePath)
|
||||
{
|
||||
File.Exists(Path.Combine(AppRoot, relativePath)).Should().BeTrue(relativePath);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PlayerService_UsesExpectedRestartAndMemoryGuards()
|
||||
{
|
||||
var unit = Read("systemd/flowercore-signage-player-pi.service");
|
||||
|
||||
unit.Should().Contain("Restart=always");
|
||||
unit.Should().Contain("RestartSec=10s");
|
||||
unit.Should().Contain("StartLimitBurst=5");
|
||||
unit.Should().Contain("StartLimitIntervalSec=300s");
|
||||
unit.Should().Contain("MemoryMax=2G");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PlayerService_IsGatedByNodeIdentityAndMtlsCertificate()
|
||||
{
|
||||
var unit = Read("systemd/flowercore-signage-player-pi.service");
|
||||
|
||||
unit.Should().Contain("ConditionPathExists=/etc/flowercore/signage-node.json");
|
||||
unit.Should().Contain("ConditionPathExists=/etc/fc-signage-player/client.p12");
|
||||
unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LaunchScript_TriesEmbedThenFallsBackToBarePlayerRoute()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-launch.sh");
|
||||
|
||||
script.Should().Contain("/player/${NODE_ID}/embed?token=${CERT_THUMB}");
|
||||
script.Should().Contain("url-divergence.log");
|
||||
script.Should().Contain("/player/${NODE_ID}?token=${CERT_THUMB}");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LaunchScript_DisablesChromiumPromptsAndRuntimeUpdates()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-launch.sh");
|
||||
|
||||
script.Should().Contain("--noerrdialogs");
|
||||
script.Should().Contain("--disable-infobars");
|
||||
script.Should().Contain("--password-store=basic");
|
||||
script.Should().Contain("--check-for-update-interval=2592000");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PrelaunchScript_AbortsWhenRequiredFilesAreMissing()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-prelaunch.sh");
|
||||
|
||||
script.Should().Contain("for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass");
|
||||
script.Should().Contain("exit 1");
|
||||
script.Should().Contain("-checkend $((7*24*3600))");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_IsIdempotentWhenAlreadyEnrolled()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("already enrolled");
|
||||
script.Should().Contain("exit 0");
|
||||
script.Should().Contain(".enrolledAt");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_GeneratesStableMachineIdFromUuid()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("uuidgen");
|
||||
script.Should().Contain("cut -c1-16");
|
||||
script.Should().Contain("machineId");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_RetriesRegisterOnceForFirstCallRace()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("for attempt in 1 2");
|
||||
script.Should().Contain("register attempt $attempt returned");
|
||||
script.Should().Contain("sleep 5");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_SupportsSetupCodeAndApprovalPollingBudget()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("signage-setup-code");
|
||||
script.Should().Contain("approve-via-setup-code");
|
||||
script.Should().Contain("+ 1800");
|
||||
script.Should().Contain("sleep 15");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_CsrSubjectIdentifiesPiPlayer()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BootstrapScript_PersistsCertificateAsP12WithRestrictivePermissions()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
||||
|
||||
script.Should().Contain("openssl pkcs12 -export");
|
||||
script.Should().Contain("client.p12.pass");
|
||||
script.Should().Contain("chmod 0600");
|
||||
script.Should().Contain("chmod 0640");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RenewScript_OnlyRunsWhenCertHasLessThanThirtyDays()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-renew-cert.sh");
|
||||
|
||||
script.Should().Contain("-checkend $((30*24*3600))");
|
||||
script.Should().Contain("exit 0");
|
||||
script.Should().Contain("/renew");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RenewScript_AtomicallySwapsNewCertificateFiles()
|
||||
{
|
||||
var script = Read("scripts/flowercore-signage-renew-cert.sh");
|
||||
|
||||
script.Should().Contain("client.key.new");
|
||||
script.Should().Contain("mv \"$CERT_DIR/client.key.new\" \"$CERT_DIR/client.key\"");
|
||||
script.Should().Contain("mv \"$CERT_DIR/client.p12.new\" \"$CERT_DIR/client.p12\"");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void HdmiRule_RestartsPlayerAndRunsCapabilityDetection()
|
||||
{
|
||||
var rule = Read("systemd/99-flowercore-signage-hdmi.rules");
|
||||
var responder = Read("scripts/flowercore-signage-hdmi-respond.sh");
|
||||
|
||||
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
|
||||
rule.Should().Contain("start flowercore-signage-player-pi-hdmi.service");
|
||||
responder.Should().Contain("sleep 2");
|
||||
responder.Should().Contain("start flowercore-signage-detect-display.service");
|
||||
responder.Should().Contain("restart flowercore-signage-player-pi.service");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DetectDisplayServiceAndTimer_RunAtBootAndDaily()
|
||||
{
|
||||
var service = Read("systemd/flowercore-signage-detect-display.service");
|
||||
var timer = Read("systemd/flowercore-signage-detect-display.timer");
|
||||
|
||||
service.Should().Contain("ExecStart=/usr/local/bin/fc-signage-detect-display");
|
||||
timer.Should().Contain("OnBootSec=30s");
|
||||
timer.Should().Contain("OnCalendar=daily");
|
||||
timer.Should().Contain("RandomizedDelaySec=1h");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DetectDisplayScript_EmitsDisconnectedProfileWhenNoHdmiIsPresent()
|
||||
{
|
||||
var script = Read("scripts/fc-signage-detect-display");
|
||||
|
||||
script.Should().Contain("displayConnected: false");
|
||||
script.Should().Contain("No HDMI display detected");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DetectDisplayScript_ParsesEdidForHdrResolutionAndAudio()
|
||||
{
|
||||
var script = Read("scripts/fc-signage-detect-display");
|
||||
|
||||
script.Should().Contain("edid-decode");
|
||||
script.Should().Contain("HDR (Static|Dynamic) Metadata Block");
|
||||
script.Should().Contain("maxResolution");
|
||||
script.Should().Contain("hasAudioOutput");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void DetectDisplayScript_TriesBothForwardCompatibleCapabilityEndpoints()
|
||||
{
|
||||
var script = Read("scripts/fc-signage-detect-display");
|
||||
|
||||
script.Should().Contain("/api/v1/nodes/${NODE_ID}/capabilities");
|
||||
script.Should().Contain("/api/v1/displays/${NODE_ID}/capability-profile");
|
||||
script.Should().Contain("no endpoint accepted the profile");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ChromiumPolicy_IsValidJsonAndDisablesCredentialPrompts()
|
||||
{
|
||||
using var doc = JsonDocument.Parse(Read("chromium-policies/flowercore-signage.json"));
|
||||
var root = doc.RootElement;
|
||||
|
||||
root.GetProperty("AutofillAddressEnabled").GetBoolean().Should().BeFalse();
|
||||
root.GetProperty("AutofillCreditCardEnabled").GetBoolean().Should().BeFalse();
|
||||
root.GetProperty("PasswordManagerEnabled").GetBoolean().Should().BeFalse();
|
||||
root.GetProperty("ExtensionInstallBlocklist")[0].GetString().Should().Be("*");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void RenewalTimer_UsesDailyCadenceWithTwoHourJitter()
|
||||
{
|
||||
var timer = Read("systemd/flowercore-signage-renew.timer");
|
||||
|
||||
timer.Should().Contain("OnCalendar=daily");
|
||||
timer.Should().Contain("RandomizedDelaySec=2h");
|
||||
timer.Should().Contain("Persistent=true");
|
||||
}
|
||||
|
||||
private static string Read(string relativePath)
|
||||
=> File.ReadAllText(Path.Combine(AppRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||
|
||||
private static string FindRepoRoot()
|
||||
{
|
||||
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
while (current is not null)
|
||||
{
|
||||
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||
{
|
||||
return current.FullName;
|
||||
}
|
||||
|
||||
current = current.Parent;
|
||||
}
|
||||
|
||||
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
package bluejayinfra.public_method_allowlist
|
||||
|
||||
public_hosts := {"dist.flowercore.io", "dns.iamworkin.lan"}
|
||||
public_hosts := {"brochure.flowercore.io", "dist.flowercore.io", "dns.iamworkin.lan"}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
|
||||
Reference in New Issue
Block a user