Compare commits

...

27 Commits

Author SHA1 Message Date
Andrew Stoltz
8e2c960be3 deploy(dns): align l4 image and auth gate 2026-06-12 12:10:23 -05:00
Andrew Stoltz
c482b66187 deploy(worldbuilder): bump image to v202606121657-35aaa2c-gpu (L2 UI sweep)
Ships the L2 pilot UI sweep to worldbuilder.iamworkin.lan: the dashboard
fc-component fix (missing-styles), ComfyUI local detection, and the rebuilt
About page. Image imported to rke2-server (10.0.56.11) + rke2-agent1
(10.0.56.12). rke2-agent2/10.0.56.13 is retired and was not used.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 12:01:16 -05:00
Andrew Stoltz
bacb756173 feat(fc-desktop): OnePasswordItem CRD for remotedesktop-oidc-client (L9 flip-readiness, gate stays OFF)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 11:31:07 -05:00
Andrew Stoltz
8a576c95ed deploy(fc-ttsreader): v20260612-readalong-corrections
TtsReader master@355a9c6: global pronunciation correction memory
(/corrections + REST/MCP), public read-along embed manifests with
fc-reader single-file cue windows (Common@639e233), mood gathering
timelines, listening-note capture, approved-only render contract fix,
and Codex Phase 14.2 rehearsal cue sheets (#42). Tests 1609/1609.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 10:07:37 -05:00
Andrew Stoltz
41c2243f09 deploy(intranet): roll screenshot metadata image 2026-06-12 01:15:23 -05:00
Andrew Stoltz
c21e602e4d deploy(intranet): roll page reading profile image 2026-06-12 00:34:21 -05:00
Andrew Stoltz
9f6b71c400 deploy(intranet): roll remotedesktop api ref image 2026-06-11 19:23:07 -05:00
Andrew Stoltz
26f90acf1f deploy(intranet): roll platform badge image 2026-06-11 18:59:25 -05:00
Andrew Stoltz
ab00d22657 deploy(worldbuilder): roll route fix image 2026-06-11 16:17:17 -05:00
Andrew Stoltz
c1a43c64b3 deploy(worldbuilder): enable live gpu backend 2026-06-11 16:05:40 -05:00
Andrew Stoltz
7103658342 deploy(intranet): roll regroup follow-through image 2026-06-11 15:58:12 -05:00
Andrew Stoltz
6b12b2bb49 deploy(intranet): roll operator depth image 2026-06-11 15:06:08 -05:00
Andrew Stoltz
a4c9e44a36 fix(runners): disable self-update in k8s pods 2026-06-11 14:57:00 -05:00
Andrew Stoltz
9674a9555e deploy(intranet): roll article depth image 2026-06-11 14:27:24 -05:00
Andrew Stoltz
318252da76 deploy(devicemgmt): roll healthz web image 2026-06-11 14:27:14 -05:00
Andrew Stoltz
3798b7c00e deploy(devicemgmt): enable web runtime 2026-06-11 14:21:51 -05:00
Andrew Stoltz
2707f1ae1e deploy(intranet): roll regroup catalog image 2026-06-11 12:32:40 -05:00
Andrew Stoltz
a7e7c1ae72 deploy(intranet): roll content quality image 2026-06-10 20:13:56 -05:00
Andrew Stoltz
c8df788d72 deploy(intranet): roll webmail health image 2026-06-10 19:15:44 -05:00
Andrew Stoltz
b1a4d7120e deploy(intranet): roll registry health image 2026-06-10 19:10:31 -05:00
Andrew Stoltz
4b57b8e939 fix(intranet): align search deploy config 2026-06-10 19:01:08 -05:00
Andrew Stoltz
70f36c546b deploy(intranet): roll hardening image 2026-06-10 18:58:09 -05:00
Robot
cdbddd71af fc-devicemgmt: stage fresh web image v20260610-bluejay (master 1614fce)
Image built from current DM master (network/BT command plane + Blue Jay
UI.Components restyle) and imported on rke2-server + rke2-agent1.
Deployment stays parked at replicas: 0 — gap 1 is wider than previously
noted (the fc-mysql Operator deployment itself is absent, so instance
CRDs would not reconcile) and gap 2 (1P runtime item) is still open.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 16:57:43 -05:00
Andrew Stoltz
81ac1f3e4f authentik: align volumeClaimTemplates TypeMeta with SSA-created live object
StatefulSet/authentik-postgres has been eternally OutOfSync since ~Sprint 65
even though 'kubectl diff --server-side --field-manager=argocd-controller'
shows zero real change. The STS was created via ServerSideApply, so the live
object carries apiVersion/kind inside volumeClaimTemplates[]; git omitting
them makes ArgoCD's normalized diff disagree forever. Declare them in git.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 15:18:29 -05:00
b842738a0e Merge pull request 'Sprint 63 Cx-10: align hardening probe paths with live routes' (#44) from codex/s63-cx10 into main
Sprint 63 Cx-10 live-proof fix after Traefik curls found three stale probe-path annotations. Local lint 100/100; git diff --check clean; no Gitea statuses attached.
2026-06-05 03:02:14 +00:00
Andrew Stoltz
f0cb7a5e81 fix(hardening): align probe-path annotations with live health routes 2026-06-04 22:01:04 -05:00
ac0f665323 Merge pull request 'Draft: Sprint 62 Cx-10 broader exposure hardening' (#43) from codex/s62-cx10 into main
Sprint 63 Cx-10 reconcile-first merge after local lint proof: 100/100 passed, no Gitea statuses attached, CRLF diff check clean.
2026-06-05 02:51:37 +00:00
15 changed files with 931 additions and 547 deletions

View File

@@ -1,448 +1,453 @@
# Authentik OIDC backend
# ArgoCD-managed. BlueJay Lab.
#
# Stack:
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
# - Redis 7 Deployment (no persistence — session/cache only)
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
# - Certificate via step-ca-acme ClusterIssuer
# - Traefik IngressRoute at id.iamworkin.lan
#
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
#
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
# via API once the bootstrap token is available — see Notes substrate).
---
apiVersion: v1
kind: Namespace
metadata:
name: authentik
labels:
app.kubernetes.io/part-of: bluejay-infra
---
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: authentik-credentials
namespace: authentik
spec:
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
---
# Shared media volume for server + worker pods.
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: authentik-media
namespace: authentik
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 2Gi
---
# PostgreSQL 16 StatefulSet — Authentik's primary store.
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: authentik-postgres
namespace: authentik
labels:
app: authentik-postgres
argocd.argoproj.io/instance: infra-authentik
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Retain
whenScaled: Retain
podManagementPolicy: OrderedReady
serviceName: authentik-postgres
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: authentik-postgres
template:
metadata:
labels:
app: authentik-postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
name: postgres
env:
- name: POSTGRES_USER
value: authentik
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
value: authentik
- name: POSTGRES_INITDB_ARGS
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
readinessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 1Gi }
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
- metadata:
name: pgdata
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
volumeMode: Filesystem
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: authentik-postgres
namespace: authentik
spec:
clusterIP: None
selector:
app: authentik-postgres
ports:
- name: postgres
port: 5432
targetPort: 5432
---
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-redis
namespace: authentik
labels:
app: authentik-redis
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: authentik-redis
template:
metadata:
labels:
app: authentik-redis
spec:
containers:
- name: redis
image: redis:7-alpine
args:
- "--save"
- ""
- "--appendonly"
- "no"
- "--requirepass"
- "$(REDIS_PASSWORD)"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
ports:
- containerPort: 6379
name: redis
readinessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 50m, memory: 64Mi }
limits: { cpu: 500m, memory: 256Mi }
---
apiVersion: v1
kind: Service
metadata:
name: authentik-redis
namespace: authentik
spec:
selector:
app: authentik-redis
ports:
- name: redis
port: 6379
targetPort: 6379
---
# Authentik server Deployment — HTTP frontend on :9000.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-server
namespace: authentik
labels:
app: authentik-server
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with worker
selector:
matchLabels:
app: authentik-server
template:
metadata:
labels:
app: authentik-server
spec:
securityContext:
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
# non-root container can mkdir /media/public during the tenant_files migration.
fsGroup: 1000
containers:
- name: server
image: ghcr.io/goauthentik/server:2024.12.3
args: ["server"]
ports:
- containerPort: 9000
name: http
- containerPort: 9443
name: https
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_TOKEN
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_TOKEN
- name: AUTHENTIK_BOOTSTRAP_EMAIL
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_EMAIL
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
# First-boot Authentik can take 3+ min on the migration phase
# (waiting on DB lock while worker also runs migrations). Initial
# delays are generous so kubelet doesn't kill the pod mid-migration;
# periodSeconds keeps post-startup probing responsive.
readinessProbe:
httpGet:
path: /-/health/ready/
port: 9000
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 12
livenessProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 300
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
startupProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
resources:
requests: { cpu: 150m, memory: 512Mi }
limits: { cpu: 1500m, memory: 1Gi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
# Authentik worker Deployment — runs Celery background tasks.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-worker
namespace: authentik
labels:
app: authentik-worker
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with server
selector:
matchLabels:
app: authentik-worker
template:
metadata:
labels:
app: authentik-worker
spec:
securityContext:
# Same as server pod — non-root uid 1000 needs PVC group write.
fsGroup: 1000
containers:
- name: worker
image: ghcr.io/goauthentik/server:2024.12.3
args: ["worker"]
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 768Mi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
apiVersion: v1
kind: Service
metadata:
name: authentik-server
namespace: authentik
spec:
selector:
app: authentik-server
ports:
- name: http
port: 9000
targetPort: 9000
- name: https
port: 9443
targetPort: 9443
---
# step-ca leaf certificate for id.iamworkin.lan.
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: authentik-tls
namespace: authentik
spec:
secretName: authentik-tls
dnsNames:
- id.iamworkin.lan
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: authentik
namespace: authentik
spec:
entryPoints: [websecure]
routes:
- match: Host(`id.iamworkin.lan`)
kind: Rule
services:
- name: authentik-server
port: 9000
tls:
secretName: authentik-tls
# Authentik OIDC backend
# ArgoCD-managed. BlueJay Lab.
#
# Stack:
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
# - Redis 7 Deployment (no persistence — session/cache only)
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
# - Certificate via step-ca-acme ClusterIssuer
# - Traefik IngressRoute at id.iamworkin.lan
#
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
#
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
# via API once the bootstrap token is available — see Notes substrate).
---
apiVersion: v1
kind: Namespace
metadata:
name: authentik
labels:
app.kubernetes.io/part-of: bluejay-infra
---
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: authentik-credentials
namespace: authentik
spec:
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
---
# Shared media volume for server + worker pods.
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: authentik-media
namespace: authentik
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 2Gi
---
# PostgreSQL 16 StatefulSet — Authentik's primary store.
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: authentik-postgres
namespace: authentik
labels:
app: authentik-postgres
argocd.argoproj.io/instance: infra-authentik
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Retain
whenScaled: Retain
podManagementPolicy: OrderedReady
serviceName: authentik-postgres
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: authentik-postgres
template:
metadata:
labels:
app: authentik-postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
name: postgres
env:
- name: POSTGRES_USER
value: authentik
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
value: authentik
- name: POSTGRES_INITDB_ARGS
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
readinessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 1Gi }
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
# apiVersion/kind included deliberately: this STS was created via ArgoCD ServerSideApply,
# so the live object carries PVC TypeMeta inside volumeClaimTemplates; omitting it here
# leaves the app eternally OutOfSync even though kubectl SSA dry-run shows no change.
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pgdata
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
volumeMode: Filesystem
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: authentik-postgres
namespace: authentik
spec:
clusterIP: None
selector:
app: authentik-postgres
ports:
- name: postgres
port: 5432
targetPort: 5432
---
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-redis
namespace: authentik
labels:
app: authentik-redis
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: authentik-redis
template:
metadata:
labels:
app: authentik-redis
spec:
containers:
- name: redis
image: redis:7-alpine
args:
- "--save"
- ""
- "--appendonly"
- "no"
- "--requirepass"
- "$(REDIS_PASSWORD)"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
ports:
- containerPort: 6379
name: redis
readinessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 50m, memory: 64Mi }
limits: { cpu: 500m, memory: 256Mi }
---
apiVersion: v1
kind: Service
metadata:
name: authentik-redis
namespace: authentik
spec:
selector:
app: authentik-redis
ports:
- name: redis
port: 6379
targetPort: 6379
---
# Authentik server Deployment — HTTP frontend on :9000.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-server
namespace: authentik
labels:
app: authentik-server
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with worker
selector:
matchLabels:
app: authentik-server
template:
metadata:
labels:
app: authentik-server
spec:
securityContext:
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
# non-root container can mkdir /media/public during the tenant_files migration.
fsGroup: 1000
containers:
- name: server
image: ghcr.io/goauthentik/server:2024.12.3
args: ["server"]
ports:
- containerPort: 9000
name: http
- containerPort: 9443
name: https
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_TOKEN
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_TOKEN
- name: AUTHENTIK_BOOTSTRAP_EMAIL
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_EMAIL
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
# First-boot Authentik can take 3+ min on the migration phase
# (waiting on DB lock while worker also runs migrations). Initial
# delays are generous so kubelet doesn't kill the pod mid-migration;
# periodSeconds keeps post-startup probing responsive.
readinessProbe:
httpGet:
path: /-/health/ready/
port: 9000
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 12
livenessProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 300
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
startupProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
resources:
requests: { cpu: 150m, memory: 512Mi }
limits: { cpu: 1500m, memory: 1Gi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
# Authentik worker Deployment — runs Celery background tasks.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-worker
namespace: authentik
labels:
app: authentik-worker
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with server
selector:
matchLabels:
app: authentik-worker
template:
metadata:
labels:
app: authentik-worker
spec:
securityContext:
# Same as server pod — non-root uid 1000 needs PVC group write.
fsGroup: 1000
containers:
- name: worker
image: ghcr.io/goauthentik/server:2024.12.3
args: ["worker"]
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 768Mi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
apiVersion: v1
kind: Service
metadata:
name: authentik-server
namespace: authentik
spec:
selector:
app: authentik-server
ports:
- name: http
port: 9000
targetPort: 9000
- name: https
port: 9443
targetPort: 9443
---
# step-ca leaf certificate for id.iamworkin.lan.
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: authentik-tls
namespace: authentik
spec:
secretName: authentik-tls
dnsNames:
- id.iamworkin.lan
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: authentik
namespace: authentik
spec:
entryPoints: [websecure]
routes:
- match: Host(`id.iamworkin.lan`)
kind: Rule
services:
- name: authentik-server
port: 9000
tls:
secretName: authentik-tls

View File

@@ -14,6 +14,20 @@
# cluster-rebuild repeatability. See
# feedback_networkpolicies_belong_in_bluejay_infra.md.
---
# OIDC client secret for the RemoteDesktop end-user sign-in (fleet regroup L9,
# 2026-06-12). The Authentik provider `remotedesktop` already exists; the 1P item
# `remotedesktop-oidc-client` (vault IAmWorkin) carries issuer_url / client_id /
# client_secret, and the 1Password operator mints the same-named K8s Secret that
# k8s/web-deployment.yaml (FlowerCore.RemoteDesktop repo) consumes with
# optional:true. Gate stays OFF (Q-RD-16) — this is flip-READINESS only.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: remotedesktop-oidc-client
namespace: fc-desktop
spec:
itemPath: "vaults/IAmWorkin/items/remotedesktop-oidc-client"
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:

View File

@@ -11,7 +11,7 @@ metadata:
flowercore.io/created-by: bluejay-infra
rules:
- apiGroups:
- devices.flowercore.io
- flowercore.io
resources:
- '*'
verbs:
@@ -23,7 +23,7 @@ rules:
- patch
- delete
- apiGroups:
- devices.flowercore.io
- flowercore.io
resources:
- devices/status
- devices/finalizers
@@ -33,6 +33,8 @@ rules:
- devicepolicies/finalizers
- remotecommands/status
- remotecommands/finalizers
- desiredstatedocuments/status
- desiredstatedocuments/finalizers
verbs:
- get
- update

View File

@@ -0,0 +1,186 @@
# FlowerCore.DeviceManagement CRDs.
#
# These CRDs match the current operator annotations:
# [KubernetesEntity(Group = "flowercore.io", ApiVersion = "v1alpha1", ...)]
# Keep the schemas intentionally permissive until the DeviceManagement operator
# grows enforced CRD validation.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devices.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devices
singular: device
kind: Device
listKind: DeviceList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicegroups.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicegroups
singular: devicegroup
kind: DeviceGroup
listKind: DeviceGroupList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicepolicies.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicepolicies
singular: devicepolicy
kind: DevicePolicy
listKind: DevicePolicyList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: remotecommands.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: remotecommands
singular: remotecommand
kind: RemoteCommand
listKind: RemoteCommandList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: desiredstatedocuments.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: desiredstatedocuments
singular: desiredstatedocument
kind: DesiredStateDocument
listKind: DesiredStateDocumentList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true

View File

@@ -5,21 +5,35 @@
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
# nodes before letting ArgoCD sync a live rollout.
#
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
# The Web pod cannot start until TWO upstream gaps close:
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
# provisioned via fc-mysql Manager. The cluster currently has ZERO
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
# password configured for the MySQL user.
# Re-enable: change replicas back to 2 after both gaps close. The image tag
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
# LIVE — 2026-06-11 DeviceManagement product-host enablement.
# The current DeviceManagement Web source is SQLite-backed in Program.cs, so
# Phase 1 production uses a Longhorn RWO PVC at /data/devicemgmt.db. The
# 1Password runtime item stays mounted through env for future MySQL/API-key
# cutover, but MySQL is not required for this first product-host rollout.
# Image v20260611-healthz is built from FlowerCore.DeviceManagement master
# 3c15f3b, which adds the /healthz alias required by fleet monitoring.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: fc-devicemgmt-web-data
namespace: fc-devicemgmt
labels:
app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -36,7 +50,7 @@ metadata:
annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec:
replicas: 0
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
@@ -64,7 +78,7 @@ spec:
fsGroupChangePolicy: OnRootMismatch
containers:
- name: web
image: localhost/fc-devicemgmt-web:v20260512-cx5
image: localhost/fc-devicemgmt-web:v20260611-healthz
imagePullPolicy: Never
ports:
- name: http
@@ -77,29 +91,21 @@ spec:
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
- name: HOME
value: "/data"
- name: FlowerCore__Service__Name
value: "FlowerCore.DeviceManagement.Web"
- name: FlowerCore__DeviceManagement__DefaultTenantId
value: "system"
- name: FlowerCore__Database__Provider
value: "MySql"
- name: FlowerCore__Database__Host
value: "mysql.fc-mysql.svc"
- name: FlowerCore__Database__Database
value: "flowercore_devicemgmt"
- name: FlowerCore__Database__User
value: "fc_devicemgmt"
value: "Sqlite"
- name: FlowerCore__Database__ConnectionStrings__Sqlite
value: "Data Source=/data/devicemgmt.db"
- name: FlowerCore__Database__Password
valueFrom:
secretKeyRef:
name: fc-devicemgmt-runtime
key: DB-Password
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
value: "/secrets/devicemgmt-mtls/mtls-client.key"
- name: FlowerCore__EventBus__Redis__Configuration
value: "redis.fc-redis.svc:6379"
resources:
@@ -136,19 +142,17 @@ spec:
drop:
- ALL
volumeMounts:
- name: data
mountPath: /data
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
- name: devicemgmt-mtls
mountPath: /secrets/devicemgmt-mtls
readOnly: true
volumes:
- name: data
persistentVolumeClaim:
claimName: fc-devicemgmt-web-data
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}
- name: devicemgmt-mtls
secret:
secretName: fc-devicemgmt-runtime
defaultMode: 0400

View File

@@ -48,7 +48,7 @@ data:
{
"FlowerCore": {
"Auth": {
"Enabled": true,
"Enabled": false,
"Oidc": {
"Enabled": true,
"Audience": "dns",
@@ -111,7 +111,7 @@ spec:
fsGroup: 1654
containers:
- name: dns-web
image: localhost/fc-dns-web:v20260604-oidc-proper
image: localhost/fc-dns-web:v20260612-l4dns-a5d2849
imagePullPolicy: Never
securityContext:
readOnlyRootFilesystem: true
@@ -149,7 +149,7 @@ spec:
key: client_secret
optional: true
- name: FlowerCore__Auth__Enabled
value: "true"
value: "false"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Audience
@@ -303,7 +303,7 @@ spec:
fsGroup: 1654
containers:
- name: dns-acme-webhook
image: localhost/fc-dns-acme-webhook:v202604290845
image: localhost/fc-dns-acme-webhook:v20260612-l4dns-a5d2849
imagePullPolicy: Never
securityContext:
readOnlyRootFilesystem: true

View File

@@ -42,7 +42,7 @@ spec:
app: messageboard-web
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"

View File

@@ -525,7 +525,7 @@ spec:
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "5217"
prometheus.io/path: "/metrics"
@@ -535,7 +535,7 @@ spec:
fsGroupChangePolicy: OnRootMismatch
containers:
- name: web
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema
image: localhost/fc-ttsreader-web:v20260612-readalong-corrections
imagePullPolicy: Never
ports:
- containerPort: 5217

View File

@@ -54,7 +54,7 @@ spec:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
fc.flowercore.io/probe-path: "/"
labels:
app: updatecenter-web
spec:

View File

@@ -12,6 +12,8 @@ All repo-scoped Linux runners use:
- `ACCESS_TOKEN` from the `github-runner-token` Secret
- `RUN_AS_ROOT=false`
- `EPHEMERAL=true`
- `DISABLE_AUTO_UPDATE=true` so the runner does not self-update and exit inside
the immutable Kubernetes pod
- `LABELS=self-hosted,linux,fc-build-linux`
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
Actions tool cache
@@ -131,3 +133,7 @@ from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
value does not change.
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
stay single-replica. New multi-replica runners use `emptyDir`.
- Runner pods repeatedly registering, downloading a newer Actions runner, then
exiting with code 4: verify `DISABLE_AUTO_UPDATE=true` is present. The image
translates that into `config.sh --disableupdate`; without it, the Deployment
controller sees the expected self-update exit as CrashLoopBackOff.

View File

@@ -195,6 +195,11 @@ spec:
# fresh registration occurs. Prevents stale runner accumulation.
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
# Labels used by workflow files: runs-on: [self-hosted, linux, fc-build-linux]
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
@@ -366,6 +371,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -504,6 +514,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -636,6 +651,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -768,6 +788,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -900,6 +925,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1035,6 +1065,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1167,6 +1202,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1299,6 +1339,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1431,6 +1476,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1565,6 +1615,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1699,6 +1754,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1838,6 +1898,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -1972,6 +2037,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2106,6 +2176,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2240,6 +2315,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2373,6 +2453,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2507,6 +2592,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2640,6 +2730,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2773,6 +2868,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -2906,6 +3006,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3039,6 +3144,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3172,6 +3282,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3306,6 +3421,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3440,6 +3560,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3574,6 +3699,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3708,6 +3838,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3842,6 +3977,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -3975,6 +4115,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -4109,6 +4254,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -4247,6 +4397,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -4386,6 +4541,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME
@@ -4521,6 +4681,11 @@ spec:
value: "/tmp/runner/work"
- name: EPHEMERAL
value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS
value: "self-hosted,linux,fc-build-linux"
- name: HOME

View File

@@ -46,7 +46,7 @@ spec:
spec:
containers:
- name: intranet-web
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge
image: localhost/fc-intranet-web:v20260612-screenshot-metadata
imagePullPolicy: Never
ports:
- containerPort: 5300
@@ -60,14 +60,17 @@ spec:
# ≈ 9 hours. BLUEJAY-WS GPU (R9700, 32GB VRAM) does the same work
# in minutes. Memory: feedback_pi5_nomic_embed_slow.
- name: IntranetSearch__OllamaBaseUrl
value: "http://10.0.56.20:11434"
# Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence
# on the writable PVC at /data. Without this env var the
# intranet falls back to the in-memory store (loses state on
# pod restart). Master's PageReadingOverrideOptions binds
# PageReadingOverrides:FilePath.
- name: PageReadingOverrides__FilePath
value: "/data/page-reading-overrides.json"
value: "http://edge1.iamworkin.lan:11434"
# External Notes corpus roots are not mounted in the live pod today.
# Keep the curated/workflow docs directory active without logging
# repeated /srv/flowercore-notes missing-root warnings.
- name: IntranetSearch__Enabled
value: "false"
# Page-reading override SQLite persistence on the writable PVC at
# /data. This backs pronunciation, notes, corrections, and
# page-profile metadata across pod restarts.
- name: PageReadingOverrides__DatabasePath
value: "/data/page-reading-overrides.db"
- name: KnowledgeFleetSearch__BaseUrl
value: "https://knowledge.iamworkin.lan"
- name: KnowledgeFleetSearch__ApiKey

View File

@@ -12,28 +12,27 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
in pfSense Unbound before this manifest is applied, or cert-manager
HTTP-01 silently exponential-backs-off ~2h.
Memory: `feedback_pfsense_dns_required_for_acme`.
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
`rke2-agent2` (10.0.56.13). Build with:
2. **Image import to ALL Ready RKE2 nodes** — pod can currently schedule to
`rke2-server` (10.0.56.11) and `rke2-agent1` (10.0.56.12). Build with:
```bash
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
mkdir -p artifacts/deploy
podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
for h in 10.0.56.11 10.0.56.12; do
ssh fcadmin@$h "mkdir -p /home/fcadmin/.fcv"
scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
ssh fcadmin@$h \
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
-n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
done
```
Memory: `feedback_rke2_image_import_per_node_scp`.
3. **Bump image tag** in `worldbuilder.yaml` and git push.
ArgoCD ApplicationSet picks up within ~3 minutes.
4. **First production render** — open
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
generated images. This Sprint 32 visitor-safe profile uses
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
for an operator-owned GPU render lane.
4. **First production render** — verify
`https://worldbuilder.iamworkin.lan/healthz`, open
`https://worldbuilder.iamworkin.lan/settings`, and confirm the image backend
reports ComfyUI before running an operator-owned render lane.
## Health probes
@@ -56,13 +55,8 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
## Image generation backend
Sprint 32 pins the Kubernetes profile to
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
deterministic, avoids GPU exposure, and still exercises the studio/gallery
surface with persisted generated-image metadata.
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
operator-owned follow-up that also verifies workstation reachability and image
import freshness.
The live internal profile now uses
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=comfyui` with
`BaseUrl=http://10.0.56.20:8188` on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2).
Keep the public host pre-staging disabled unless the five safe-to-expose gates
are rechecked; the live GPU lane is operator-owned and internal-only.

View File

@@ -5,10 +5,10 @@
#
# Image build (BLUEJAY-WS):
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
# podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
# for h in 10.0.56.11 10.0.56.12; do
# scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
# done
---
apiVersion: v1
@@ -90,7 +90,7 @@ spec:
containers:
- name: web
# Bump tag for each rebuild. Initial deploy: v202605062048
image: localhost/fc-worldbuilder:v202605062048
image: localhost/fc-worldbuilder:v202606121657-35aaa2c-gpu
imagePullPolicy: Never
ports:
- containerPort: 8080
@@ -117,14 +117,16 @@ spec:
value: "/data/gallery"
- name: FlowerCore__WorldBuilder__Export__RootPath
value: "/data/exports"
# Visitor-safe Sprint 32 profile: fake backend keeps public demo
# rendering deterministic and avoids exposing BLUEJAY-WS GPU.
# Operator-approved live GPU lane. Internal-only host targets
# BLUEJAY-WS ComfyUI; keep public host pre-staging disabled below.
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
value: "http://127.0.0.1:1"
value: "http://10.0.56.20:8188"
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
value: "fake"
value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
value: "fake"
value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__VisitorSafe
value: "false"
resources:
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
# time) while actual CPU usage is well below capacity. Idle Blazor

View File

@@ -87,10 +87,10 @@ public sealed class FleetManifestLintTests
["fc-devicemgmt"] = ("fc-devicemgmt-web", "/healthz"),
["fc-library"] = ("library-web", "/health"),
["fc-llm-bridge"] = ("fc-llm-bridge", "/healthz"),
["fc-messageboard"] = ("messageboard-web", "/healthz"),
["fc-messageboard"] = ("messageboard-web", "/health"),
["fc-retail"] = ("retail-web", "/healthz"),
["fc-ttsreader"] = ("ttsreader-web", "/healthz"),
["fc-updater"] = ("updatecenter-web", "/healthz"),
["fc-ttsreader"] = ("ttsreader-web", "/health"),
["fc-updater"] = ("updatecenter-web", "/"),
["knowledge"] = ("knowledge-web", "/healthz"),
["telephony"] = ("telephony-web", "/health"),
["worldbuilder"] = ("worldbuilder-web", "/healthz"),
@@ -272,6 +272,7 @@ public sealed class FleetManifestLintTests
var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
EnvValue(container, "EPHEMERAL").Should().Be("true");
EnvValue(container, "DISABLE_AUTO_UPDATE").Should().Be("true", $"{expectedRunner.Key} must not self-update inside immutable Kubernetes runner pods");
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
@@ -649,6 +650,7 @@ public sealed class FleetManifestLintTests
"certificate-web.yaml",
"clusterrole-operator.yaml",
"clusterrolebinding-operator.yaml",
"crds.yaml",
"deployment-operator.yaml",
"deployment-web.yaml",
"ingressroute-web.yaml",
@@ -738,7 +740,8 @@ public sealed class FleetManifestLintTests
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
var allScalars = clusterRole.AllScalars().ToList();
allScalars.Should().Contain("devices.flowercore.io");
allScalars.Should().Contain("flowercore.io");
allScalars.Should().NotContain("devices.flowercore.io");
allScalars.Should().Contain("*");
allScalars.Should().Contain("deployments");
allScalars.Should().Contain("get");
@@ -767,7 +770,7 @@ public sealed class FleetManifestLintTests
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
appText.Should().Contain("secretKeyRef:");
appText.Should().Contain("secretName: fc-devicemgmt-runtime");
appText.Should().Contain("name: fc-devicemgmt-runtime");
appText.Should().NotContain("stringData:");
appText.Should().NotContain("from-literal");
appText.Should().NotContain("tls.key:");
@@ -864,9 +867,9 @@ public sealed class FleetManifestLintTests
{
var deployments = new[]
{
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client"),
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client"),
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client"),
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client", AuthEnabled: "false"),
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client", AuthEnabled: "true"),
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client", AuthEnabled: "true"),
};
foreach (var expected in deployments)
@@ -875,7 +878,7 @@ public sealed class FleetManifestLintTests
.Single(document => document.Kind == "Deployment" && document.Name == expected.Name);
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be(expected.AuthEnabled);
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
(EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId"))
.Should()
@@ -924,7 +927,7 @@ public sealed class FleetManifestLintTests
var dnsPvc = AppDocuments("fc-dns")
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data");
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260604-oidc-proper");
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260612-l4dns-a5d2849");
dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi");