Compare commits

..

23 Commits

Author SHA1 Message Date
Andrew Stoltz
8e2c960be3 deploy(dns): align l4 image and auth gate 2026-06-12 12:10:23 -05:00
Andrew Stoltz
c482b66187 deploy(worldbuilder): bump image to v202606121657-35aaa2c-gpu (L2 UI sweep)
Ships the L2 pilot UI sweep to worldbuilder.iamworkin.lan: the dashboard
fc-component fix (missing-styles), ComfyUI local detection, and the rebuilt
About page. Image imported to rke2-server (10.0.56.11) + rke2-agent1
(10.0.56.12). rke2-agent2/10.0.56.13 is retired and was not used.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 12:01:16 -05:00
Andrew Stoltz
bacb756173 feat(fc-desktop): OnePasswordItem CRD for remotedesktop-oidc-client (L9 flip-readiness, gate stays OFF)
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 11:31:07 -05:00
Andrew Stoltz
8a576c95ed deploy(fc-ttsreader): v20260612-readalong-corrections
TtsReader master@355a9c6: global pronunciation correction memory
(/corrections + REST/MCP), public read-along embed manifests with
fc-reader single-file cue windows (Common@639e233), mood gathering
timelines, listening-note capture, approved-only render contract fix,
and Codex Phase 14.2 rehearsal cue sheets (#42). Tests 1609/1609.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 10:07:37 -05:00
Andrew Stoltz
41c2243f09 deploy(intranet): roll screenshot metadata image 2026-06-12 01:15:23 -05:00
Andrew Stoltz
c21e602e4d deploy(intranet): roll page reading profile image 2026-06-12 00:34:21 -05:00
Andrew Stoltz
9f6b71c400 deploy(intranet): roll remotedesktop api ref image 2026-06-11 19:23:07 -05:00
Andrew Stoltz
26f90acf1f deploy(intranet): roll platform badge image 2026-06-11 18:59:25 -05:00
Andrew Stoltz
ab00d22657 deploy(worldbuilder): roll route fix image 2026-06-11 16:17:17 -05:00
Andrew Stoltz
c1a43c64b3 deploy(worldbuilder): enable live gpu backend 2026-06-11 16:05:40 -05:00
Andrew Stoltz
7103658342 deploy(intranet): roll regroup follow-through image 2026-06-11 15:58:12 -05:00
Andrew Stoltz
6b12b2bb49 deploy(intranet): roll operator depth image 2026-06-11 15:06:08 -05:00
Andrew Stoltz
a4c9e44a36 fix(runners): disable self-update in k8s pods 2026-06-11 14:57:00 -05:00
Andrew Stoltz
9674a9555e deploy(intranet): roll article depth image 2026-06-11 14:27:24 -05:00
Andrew Stoltz
318252da76 deploy(devicemgmt): roll healthz web image 2026-06-11 14:27:14 -05:00
Andrew Stoltz
3798b7c00e deploy(devicemgmt): enable web runtime 2026-06-11 14:21:51 -05:00
Andrew Stoltz
2707f1ae1e deploy(intranet): roll regroup catalog image 2026-06-11 12:32:40 -05:00
Andrew Stoltz
a7e7c1ae72 deploy(intranet): roll content quality image 2026-06-10 20:13:56 -05:00
Andrew Stoltz
c8df788d72 deploy(intranet): roll webmail health image 2026-06-10 19:15:44 -05:00
Andrew Stoltz
b1a4d7120e deploy(intranet): roll registry health image 2026-06-10 19:10:31 -05:00
Andrew Stoltz
4b57b8e939 fix(intranet): align search deploy config 2026-06-10 19:01:08 -05:00
Andrew Stoltz
70f36c546b deploy(intranet): roll hardening image 2026-06-10 18:58:09 -05:00
Robot
cdbddd71af fc-devicemgmt: stage fresh web image v20260610-bluejay (master 1614fce)
Image built from current DM master (network/BT command plane + Blue Jay
UI.Components restyle) and imported on rke2-server + rke2-agent1.
Deployment stays parked at replicas: 0 — gap 1 is wider than previously
noted (the fc-mysql Operator deployment itself is absent, so instance
CRDs would not reconcile) and gap 2 (1P runtime item) is still open.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 16:57:43 -05:00
15 changed files with 473 additions and 337 deletions

View File

@@ -2,22 +2,6 @@
Infrastructure manifests for ArgoCD. An `ApplicationSet` in `argocd` namespace watches the `apps/*` directories in this repo and creates one `Application` per subdir (prefixed `infra-<name>`). Infrastructure manifests for ArgoCD. An `ApplicationSet` in `argocd` namespace watches the `apps/*` directories in this repo and creates one `Application` per subdir (prefixed `infra-<name>`).
## Root GitOps ApplicationSet
`argocd/applicationset-bluejay-infra.yaml` is the root of this GitOps tree, but
it is **NOT self-managed** by ArgoCD. Apply it manually when the root generator
or sync policy changes:
```bash
kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml
```
Keep the per-StatefulSet `ignoreDifferences` entries in that file synced with
the live ApplicationSet. They intentionally cover `zabbix-postgres`,
`guac-mysql`, `matrix-postgres`, and `authentik-postgres` so ArgoCD does not
loop forever on server-side-apply `volumeClaimTemplates` status drift. Every new
StatefulSet with `volumeClaimTemplates` needs its own entry appended.
## Adding a new service to the cluster ## Adding a new service to the cluster
Follow these steps in order. **Step 1 must run before step 3** — if you skip it, cert-manager HTTP-01 will silently fail for ~2h per cert (exponential backoff) until someone diagnoses the DNS. Follow these steps in order. **Step 1 must run before step 3** — if you skip it, cert-manager HTTP-01 will silently fail for ~2h per cert (exponential backoff) until someone diagnoses the DNS.

View File

@@ -14,6 +14,20 @@
# cluster-rebuild repeatability. See # cluster-rebuild repeatability. See
# feedback_networkpolicies_belong_in_bluejay_infra.md. # feedback_networkpolicies_belong_in_bluejay_infra.md.
--- ---
# OIDC client secret for the RemoteDesktop end-user sign-in (fleet regroup L9,
# 2026-06-12). The Authentik provider `remotedesktop` already exists; the 1P item
# `remotedesktop-oidc-client` (vault IAmWorkin) carries issuer_url / client_id /
# client_secret, and the 1Password operator mints the same-named K8s Secret that
# k8s/web-deployment.yaml (FlowerCore.RemoteDesktop repo) consumes with
# optional:true. Gate stays OFF (Q-RD-16) — this is flip-READINESS only.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: remotedesktop-oidc-client
namespace: fc-desktop
spec:
itemPath: "vaults/IAmWorkin/items/remotedesktop-oidc-client"
---
apiVersion: cert-manager.io/v1 apiVersion: cert-manager.io/v1
kind: Certificate kind: Certificate
metadata: metadata:

View File

@@ -11,7 +11,7 @@ metadata:
flowercore.io/created-by: bluejay-infra flowercore.io/created-by: bluejay-infra
rules: rules:
- apiGroups: - apiGroups:
- devices.flowercore.io - flowercore.io
resources: resources:
- '*' - '*'
verbs: verbs:
@@ -23,7 +23,7 @@ rules:
- patch - patch
- delete - delete
- apiGroups: - apiGroups:
- devices.flowercore.io - flowercore.io
resources: resources:
- devices/status - devices/status
- devices/finalizers - devices/finalizers
@@ -33,6 +33,8 @@ rules:
- devicepolicies/finalizers - devicepolicies/finalizers
- remotecommands/status - remotecommands/status
- remotecommands/finalizers - remotecommands/finalizers
- desiredstatedocuments/status
- desiredstatedocuments/finalizers
verbs: verbs:
- get - get
- update - update

View File

@@ -0,0 +1,186 @@
# FlowerCore.DeviceManagement CRDs.
#
# These CRDs match the current operator annotations:
# [KubernetesEntity(Group = "flowercore.io", ApiVersion = "v1alpha1", ...)]
# Keep the schemas intentionally permissive until the DeviceManagement operator
# grows enforced CRD validation.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devices.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devices
singular: device
kind: Device
listKind: DeviceList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicegroups.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicegroups
singular: devicegroup
kind: DeviceGroup
listKind: DeviceGroupList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicepolicies.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicepolicies
singular: devicepolicy
kind: DevicePolicy
listKind: DevicePolicyList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: remotecommands.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: remotecommands
singular: remotecommand
kind: RemoteCommand
listKind: RemoteCommandList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: desiredstatedocuments.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: desiredstatedocuments
singular: desiredstatedocument
kind: DesiredStateDocument
listKind: DesiredStateDocumentList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true

View File

@@ -5,21 +5,35 @@
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2 # exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
# nodes before letting ArgoCD sync a live rollout. # nodes before letting ArgoCD sync a live rollout.
# #
# SCALED TO 0 — 2026-05-19 morning-routine cleanup. # LIVE — 2026-06-11 DeviceManagement product-host enablement.
# The Web pod cannot start until TWO upstream gaps close: # The current DeviceManagement Web source is SQLite-backed in Program.cs, so
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is # Phase 1 production uses a Longhorn RWO PVC at /data/devicemgmt.db. The
# provisioned via fc-mysql Manager. The cluster currently has ZERO # 1Password runtime item stays mounted through env for future MySQL/API-key
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the # cutover, but MySQL is not required for this first product-host rollout.
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc` # Image v20260611-healthz is built from FlowerCore.DeviceManagement master
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP. # 3c15f3b, which adds the /healthz alias required by fleet monitoring.
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime` ---
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key, apiVersion: v1
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS kind: PersistentVolumeClaim
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the metadata:
# password configured for the MySQL user. name: fc-devicemgmt-web-data
# Re-enable: change replicas back to 2 after both gaps close. The image tag namespace: fc-devicemgmt
# in this file (v20260512-cx5) MAY also need a refresh — it predates the labels:
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug. app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1 apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
@@ -36,7 +50,7 @@ metadata:
annotations: annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec: spec:
replicas: 0 replicas: 1
revisionHistoryLimit: 3 revisionHistoryLimit: 3
selector: selector:
matchLabels: matchLabels:
@@ -64,7 +78,7 @@ spec:
fsGroupChangePolicy: OnRootMismatch fsGroupChangePolicy: OnRootMismatch
containers: containers:
- name: web - name: web
image: localhost/fc-devicemgmt-web:v20260512-cx5 image: localhost/fc-devicemgmt-web:v20260611-healthz
imagePullPolicy: Never imagePullPolicy: Never
ports: ports:
- name: http - name: http
@@ -77,29 +91,21 @@ spec:
value: "Production" value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false" value: "false"
- name: HOME
value: "/data"
- name: FlowerCore__Service__Name - name: FlowerCore__Service__Name
value: "FlowerCore.DeviceManagement.Web" value: "FlowerCore.DeviceManagement.Web"
- name: FlowerCore__DeviceManagement__DefaultTenantId - name: FlowerCore__DeviceManagement__DefaultTenantId
value: "system" value: "system"
- name: FlowerCore__Database__Provider - name: FlowerCore__Database__Provider
value: "MySql" value: "Sqlite"
- name: FlowerCore__Database__Host - name: FlowerCore__Database__ConnectionStrings__Sqlite
value: "mysql.fc-mysql.svc" value: "Data Source=/data/devicemgmt.db"
- name: FlowerCore__Database__Database
value: "flowercore_devicemgmt"
- name: FlowerCore__Database__User
value: "fc_devicemgmt"
- name: FlowerCore__Database__Password - name: FlowerCore__Database__Password
valueFrom: valueFrom:
secretKeyRef: secretKeyRef:
name: fc-devicemgmt-runtime name: fc-devicemgmt-runtime
key: DB-Password key: DB-Password
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
value: "/secrets/devicemgmt-mtls/mtls-client.key"
- name: FlowerCore__EventBus__Redis__Configuration - name: FlowerCore__EventBus__Redis__Configuration
value: "redis.fc-redis.svc:6379" value: "redis.fc-redis.svc:6379"
resources: resources:
@@ -136,19 +142,17 @@ spec:
drop: drop:
- ALL - ALL
volumeMounts: volumeMounts:
- name: data
mountPath: /data
- name: tmp - name: tmp
mountPath: /tmp mountPath: /tmp
- name: logs - name: logs
mountPath: /app/logs mountPath: /app/logs
- name: devicemgmt-mtls
mountPath: /secrets/devicemgmt-mtls
readOnly: true
volumes: volumes:
- name: data
persistentVolumeClaim:
claimName: fc-devicemgmt-web-data
- name: tmp - name: tmp
emptyDir: {} emptyDir: {}
- name: logs - name: logs
emptyDir: {} emptyDir: {}
- name: devicemgmt-mtls
secret:
secretName: fc-devicemgmt-runtime
defaultMode: 0400

View File

@@ -48,7 +48,7 @@ data:
{ {
"FlowerCore": { "FlowerCore": {
"Auth": { "Auth": {
"Enabled": true, "Enabled": false,
"Oidc": { "Oidc": {
"Enabled": true, "Enabled": true,
"Audience": "dns", "Audience": "dns",
@@ -111,7 +111,7 @@ spec:
fsGroup: 1654 fsGroup: 1654
containers: containers:
- name: dns-web - name: dns-web
image: localhost/fc-dns-web:v20260604-oidc-proper image: localhost/fc-dns-web:v20260612-l4dns-a5d2849
imagePullPolicy: Never imagePullPolicy: Never
securityContext: securityContext:
readOnlyRootFilesystem: true readOnlyRootFilesystem: true
@@ -149,7 +149,7 @@ spec:
key: client_secret key: client_secret
optional: true optional: true
- name: FlowerCore__Auth__Enabled - name: FlowerCore__Auth__Enabled
value: "true" value: "false"
- name: FlowerCore__Auth__Oidc__Enabled - name: FlowerCore__Auth__Oidc__Enabled
value: "true" value: "true"
- name: FlowerCore__Auth__Oidc__Audience - name: FlowerCore__Auth__Oidc__Audience
@@ -303,7 +303,7 @@ spec:
fsGroup: 1654 fsGroup: 1654
containers: containers:
- name: dns-acme-webhook - name: dns-acme-webhook
image: localhost/fc-dns-acme-webhook:v202604290845 image: localhost/fc-dns-acme-webhook:v20260612-l4dns-a5d2849
imagePullPolicy: Never imagePullPolicy: Never
securityContext: securityContext:
readOnlyRootFilesystem: true readOnlyRootFilesystem: true

View File

@@ -535,7 +535,7 @@ spec:
fsGroupChangePolicy: OnRootMismatch fsGroupChangePolicy: OnRootMismatch
containers: containers:
- name: web - name: web
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema image: localhost/fc-ttsreader-web:v20260612-readalong-corrections
imagePullPolicy: Never imagePullPolicy: Never
ports: ports:
- containerPort: 5217 - containerPort: 5217

View File

@@ -12,6 +12,8 @@ All repo-scoped Linux runners use:
- `ACCESS_TOKEN` from the `github-runner-token` Secret - `ACCESS_TOKEN` from the `github-runner-token` Secret
- `RUN_AS_ROOT=false` - `RUN_AS_ROOT=false`
- `EPHEMERAL=true` - `EPHEMERAL=true`
- `DISABLE_AUTO_UPDATE=true` so the runner does not self-update and exit inside
the immutable Kubernetes pod
- `LABELS=self-hosted,linux,fc-build-linux` - `LABELS=self-hosted,linux,fc-build-linux`
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and - writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
Actions tool cache Actions tool cache
@@ -24,12 +26,6 @@ original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
strategy: no two pods share one RWO PVC. strategy: no two pods share one RWO PVC.
Ephemeral runner pods are expected to register, run one job, deregister, and
exit so the Deployment starts a fresh pod for the next registration token. A
small amount of exit-1/restart churn from token-expiry or no-work windows is
accepted operational noise as long as jobs are not stuck queued and the
repo-scoped runner-offline alerts stay quiet.
Sprint 32 final long-tail wave adds 16 two-replica Deployments: Sprint 32 final long-tail wave adds 16 two-replica Deployments:
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`, `FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`, `FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
@@ -137,3 +133,7 @@ from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
value does not change. value does not change.
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must - `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
stay single-replica. New multi-replica runners use `emptyDir`. stay single-replica. New multi-replica runners use `emptyDir`.
- Runner pods repeatedly registering, downloading a newer Actions runner, then
exiting with code 4: verify `DISABLE_AUTO_UPDATE=true` is present. The image
translates that into `config.sh --disableupdate`; without it, the Deployment
controller sees the expected self-update exit as CrashLoopBackOff.

View File

@@ -195,6 +195,11 @@ spec:
# fresh registration occurs. Prevents stale runner accumulation. # fresh registration occurs. Prevents stale runner accumulation.
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
# Labels used by workflow files: runs-on: [self-hosted, linux, fc-build-linux] # Labels used by workflow files: runs-on: [self-hosted, linux, fc-build-linux]
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
@@ -366,6 +371,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -504,6 +514,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -636,6 +651,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -768,6 +788,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -900,6 +925,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1035,6 +1065,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1167,6 +1202,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1299,6 +1339,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1431,6 +1476,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1565,6 +1615,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1699,6 +1754,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1838,6 +1898,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -1972,6 +2037,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2106,6 +2176,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2240,6 +2315,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2373,6 +2453,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2507,6 +2592,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2640,6 +2730,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2773,6 +2868,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -2906,6 +3006,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3039,6 +3144,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3172,6 +3282,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3306,6 +3421,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3440,6 +3560,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3574,6 +3699,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3708,6 +3838,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3842,6 +3977,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -3975,6 +4115,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -4109,6 +4254,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -4247,6 +4397,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -4386,6 +4541,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME
@@ -4521,6 +4681,11 @@ spec:
value: "/tmp/runner/work" value: "/tmp/runner/work"
- name: EPHEMERAL - name: EPHEMERAL
value: "true" value: "true"
# The runner image must not self-update inside an immutable
# Kubernetes pod. Without this, GitHub runner auto-update exits
# with code 4 and the Deployment falls into CrashLoopBackOff.
- name: DISABLE_AUTO_UPDATE
value: "true"
- name: LABELS - name: LABELS
value: "self-hosted,linux,fc-build-linux" value: "self-hosted,linux,fc-build-linux"
- name: HOME - name: HOME

View File

@@ -46,7 +46,7 @@ spec:
spec: spec:
containers: containers:
- name: intranet-web - name: intranet-web
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge image: localhost/fc-intranet-web:v20260612-screenshot-metadata
imagePullPolicy: Never imagePullPolicy: Never
ports: ports:
- containerPort: 5300 - containerPort: 5300
@@ -60,14 +60,17 @@ spec:
# ≈ 9 hours. BLUEJAY-WS GPU (R9700, 32GB VRAM) does the same work # ≈ 9 hours. BLUEJAY-WS GPU (R9700, 32GB VRAM) does the same work
# in minutes. Memory: feedback_pi5_nomic_embed_slow. # in minutes. Memory: feedback_pi5_nomic_embed_slow.
- name: IntranetSearch__OllamaBaseUrl - name: IntranetSearch__OllamaBaseUrl
value: "http://10.0.56.20:11434" value: "http://edge1.iamworkin.lan:11434"
# Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence # External Notes corpus roots are not mounted in the live pod today.
# on the writable PVC at /data. Without this env var the # Keep the curated/workflow docs directory active without logging
# intranet falls back to the in-memory store (loses state on # repeated /srv/flowercore-notes missing-root warnings.
# pod restart). Master's PageReadingOverrideOptions binds - name: IntranetSearch__Enabled
# PageReadingOverrides:FilePath. value: "false"
- name: PageReadingOverrides__FilePath # Page-reading override SQLite persistence on the writable PVC at
value: "/data/page-reading-overrides.json" # /data. This backs pronunciation, notes, corrections, and
# page-profile metadata across pod restarts.
- name: PageReadingOverrides__DatabasePath
value: "/data/page-reading-overrides.db"
- name: KnowledgeFleetSearch__BaseUrl - name: KnowledgeFleetSearch__BaseUrl
value: "https://knowledge.iamworkin.lan" value: "https://knowledge.iamworkin.lan"
- name: KnowledgeFleetSearch__ApiKey - name: KnowledgeFleetSearch__ApiKey

View File

@@ -843,9 +843,7 @@ data:
rules: rules:
- alert: PiManagerDown - alert: PiManagerDown
expr: up{job="pimanager-app"} == 0 expr: up{job="pimanager-app"} == 0
# Sprint 67: delayed behind NodeDown's critical page so a powered-off for: 3m
# Pi does not create the first duplicate page for the same host.
for: 8m
labels: labels:
severity: warning severity: warning
annotations: annotations:
@@ -1244,58 +1242,6 @@ data:
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})" summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug." description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
# ============================================================
# Update Center public-edge probes
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
# This K8s ConfigMap is the future migration target; live Prometheus
# still reads the canonical Notes file from noc1 Podman.
# ============================================================
- name: update_center
rules:
# Critical only when the edge is genuinely unreachable. A Cloudflare
# HTTP 429 means the prober hit a rate-limit, not that real clients
# are down, so the warning rule below owns that signal.
- alert: UpdateCenterPublicEdgeDown
expr: |
(probe_success{job="probe-update-center-public-edge"} == 0)
unless on(instance)
(probe_http_status_code{job="probe-update-center-public-edge"} == 429)
for: 10m
labels:
severity: critical
service: update-center
alert_channel: irc
annotations:
summary: "Update Center public edge probe failed for {{ $labels.instance }}"
description: >-
The external probe for {{ $labels.instance }} failed for 10 minutes with a
non-2xx status that is not a rate-limit. Public Update Center clients may be
unable to fetch manifest schema metadata through Cloudflare.
runbook: >-
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema
2. Verify Cloudflare DNS record is proxied and targets the current public edge IP
3. kubectl -n fc-updater get ingressroute updatecenter-web-public secret cf-origin-flowercore-io
4. Check Traefik logs for Method() or TLS secret errors
- alert: UpdateCenterPublicEdgeRateLimited
expr: probe_http_status_code{job="probe-update-center-public-edge"} == 429
for: 15m
labels:
severity: warning
service: update-center
alert_channel: irc
annotations:
summary: "Cloudflare is rate-limiting (HTTP 429) the public-edge probe for {{ $labels.instance }}"
description: >-
The blackbox prober receives HTTP 429 from Cloudflare for {{ $labels.instance }}
while the origin is healthy. This is a Cloudflare rate-limit / WAF condition on
the public hostname, not an outage.
runbook: >-
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema (expect 200 from a normal client)
2. Review Cloudflare rate-limit / WAF rules for the hostname; the 5m-cadence prober is tripping a 429
3. Add a Cloudflare rate-limit exception for the prober source IP or the /api/v1/manifests/_schema path
4. Confirm whether the singular host update.flowercore.io is still required, or only updates.flowercore.io
# ============================================================================= # =============================================================================
# ConfigMap: Blackbox Exporter Configuration # ConfigMap: Blackbox Exporter Configuration
# ============================================================================= # =============================================================================

View File

@@ -12,28 +12,27 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
in pfSense Unbound before this manifest is applied, or cert-manager in pfSense Unbound before this manifest is applied, or cert-manager
HTTP-01 silently exponential-backs-off ~2h. HTTP-01 silently exponential-backs-off ~2h.
Memory: `feedback_pfsense_dns_required_for_acme`. Memory: `feedback_pfsense_dns_required_for_acme`.
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of 2. **Image import to ALL Ready RKE2 nodes** — pod can currently schedule to
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12), `rke2-server` (10.0.56.11) and `rke2-agent1` (10.0.56.12). Build with:
`rke2-agent2` (10.0.56.13). Build with:
```bash ```bash
bash deploy/build.sh # in FlowerCore.WorldBuilder repo bash deploy/build.sh # in FlowerCore.WorldBuilder repo
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar mkdir -p artifacts/deploy
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/ for h in 10.0.56.11 10.0.56.12; do
ssh fcadmin@$h "mkdir -p /home/fcadmin/.fcv"
scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
ssh fcadmin@$h \ ssh fcadmin@$h \
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \ "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar" -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
done done
``` ```
Memory: `feedback_rke2_image_import_per_node_scp`. Memory: `feedback_rke2_image_import_per_node_scp`.
3. **Bump image tag** in `worldbuilder.yaml` and git push. 3. **Bump image tag** in `worldbuilder.yaml` and git push.
ArgoCD ApplicationSet picks up within ~3 minutes. ArgoCD ApplicationSet picks up within ~3 minutes.
4. **First production render** — open 4. **First production render** — verify
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004` `https://worldbuilder.iamworkin.lan/healthz`, open
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake `https://worldbuilder.iamworkin.lan/settings`, and confirm the image backend
generated images. This Sprint 32 visitor-safe profile uses reports ComfyUI before running an operator-owned render lane.
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
for an operator-owned GPU render lane.
## Health probes ## Health probes
@@ -56,13 +55,8 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
## Image generation backend ## Image generation backend
Sprint 32 pins the Kubernetes profile to The live internal profile now uses
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with `FlowerCore:WorldBuilder:ImageGeneration:ClientMode=comfyui` with
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo `BaseUrl=http://10.0.56.20:8188` on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2).
deterministic, avoids GPU exposure, and still exercises the studio/gallery Keep the public host pre-staging disabled unless the five safe-to-expose gates
surface with persisted generated-image metadata. are rechecked; the live GPU lane is operator-owned and internal-only.
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
operator-owned follow-up that also verifies workstation reachability and image
import freshness.

View File

@@ -5,10 +5,10 @@
# #
# Image build (BLUEJAY-WS): # Image build (BLUEJAY-WS):
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo # bash deploy/build.sh # in FlowerCore.WorldBuilder repo
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar # podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do # for h in 10.0.56.11 10.0.56.12; do
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/ # scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar" # ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
# done # done
--- ---
apiVersion: v1 apiVersion: v1
@@ -90,7 +90,7 @@ spec:
containers: containers:
- name: web - name: web
# Bump tag for each rebuild. Initial deploy: v202605062048 # Bump tag for each rebuild. Initial deploy: v202605062048
image: localhost/fc-worldbuilder:v202605062048 image: localhost/fc-worldbuilder:v202606121657-35aaa2c-gpu
imagePullPolicy: Never imagePullPolicy: Never
ports: ports:
- containerPort: 8080 - containerPort: 8080
@@ -117,14 +117,16 @@ spec:
value: "/data/gallery" value: "/data/gallery"
- name: FlowerCore__WorldBuilder__Export__RootPath - name: FlowerCore__WorldBuilder__Export__RootPath
value: "/data/exports" value: "/data/exports"
# Visitor-safe Sprint 32 profile: fake backend keeps public demo # Operator-approved live GPU lane. Internal-only host targets
# rendering deterministic and avoids exposing BLUEJAY-WS GPU. # BLUEJAY-WS ComfyUI; keep public host pre-staging disabled below.
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl - name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
value: "http://127.0.0.1:1" value: "http://10.0.56.20:8188"
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode - name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
value: "fake" value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId - name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
value: "fake" value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__VisitorSafe
value: "false"
resources: resources:
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy # Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
# time) while actual CPU usage is well below capacity. Idle Blazor # time) while actual CPU usage is well below capacity. Idle Blazor

View File

@@ -1,74 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
annotations:
argocd.argoproj.io/refresh: "true"
name: bluejay-infra
namespace: argocd
spec:
generators:
- git:
directories:
- path: apps/*
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
revision: main
template:
metadata: {}
spec:
destination: {}
project: ""
goTemplate: true
goTemplateOptions:
- missingkey=error
template:
metadata:
name: infra-{{.path.basename}}
spec:
destination:
server: https://kubernetes.default.svc
ignoreDifferences:
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: zabbix-postgres
namespace: zabbix
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: guac-mysql
namespace: guacamole
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: matrix-postgres
namespace: matrix
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: authentik-postgres
namespace: authentik
project: default
source:
path: '{{.path.path}}'
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
targetRevision: main
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- RespectIgnoreDifferences=true

View File

@@ -272,6 +272,7 @@ public sealed class FleetManifestLintTests
var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject; var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value); EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
EnvValue(container, "EPHEMERAL").Should().Be("true"); EnvValue(container, "EPHEMERAL").Should().Be("true");
EnvValue(container, "DISABLE_AUTO_UPDATE").Should().Be("true", $"{expectedRunner.Key} must not self-update inside immutable Kubernetes runner pods");
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux"); EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
EnvValue(container, "RUN_AS_ROOT").Should().Be("false"); EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal"); EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
@@ -468,99 +469,6 @@ public sealed class FleetManifestLintTests
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts"); monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
} }
[Fact]
public void GithubRunnerReadme_DocumentsAcceptedEphemeralExitChurn()
{
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "github-runner", "README.md"));
readme.Should().Contain("Ephemeral runner pods");
readme.Should().Contain("exit-1/restart churn");
readme.Should().Contain("accepted operational noise");
readme.Should().Contain("repo-scoped runner-offline alerts stay quiet");
}
[Fact]
public void Monitoring_PiManagerDownDelayAndUpdateCenterRateLimit_MatchCanonicalAlerts()
{
var notesAlerts = File.ReadAllText(Path.Combine(
Inventory.WorkspaceRoot,
"FlowerCore.Notes",
"scripts",
"monitoring",
"alerts.yml"));
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
notesAlerts.Should().Contain("# Sprint 67: keep this warning behind NodeDown's 5m critical page");
notesAlerts.Should().Contain("- alert: PiManagerDown");
notesAlerts.Should().Contain("for: 8m");
monitoring.Should().Contain("# Sprint 67: delayed behind NodeDown's critical page");
monitoring.Should().Contain("- alert: PiManagerDown");
monitoring.Should().Contain("for: 8m");
notesAlerts.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
notesAlerts.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
notesAlerts.Should().Contain("for: 15m");
monitoring.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
monitoring.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
monitoring.Should().Contain("for: 15m");
monitoring.Should().Contain("severity: warning");
}
[Fact]
public void ApplicationSetExport_MustRemainManualRootOfGitOpsTree()
{
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "README.md"));
var appsetPath = Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml");
File.Exists(appsetPath).Should().BeTrue();
var appset = File.ReadAllText(appsetPath);
appset.Should().Contain("kind: ApplicationSet");
appset.Should().Contain("name: bluejay-infra");
appset.Should().NotContain("\nstatus:");
appset.Should().NotContain("managedFields:");
readme.Should().Contain("root of this GitOps tree");
readme.Should().Contain("NOT self-managed");
readme.Should().Contain("kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml");
}
[Fact]
public void ApplicationSetExport_MustDiscoverAppsDirectoryOnMain()
{
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
appset.Should().Contain("path: apps/*");
appset.Should().Contain("revision: main");
appset.Should().Contain("repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git");
appset.Should().Contain("path: '{{.path.path}}'");
appset.Should().Contain("targetRevision: main");
appset.Should().Contain("ServerSideApply=true");
appset.Should().Contain("RespectIgnoreDifferences=true");
}
[Fact]
public void ApplicationSetExport_MustPreserveStatefulSetIgnoreDifferences()
{
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
appset.Should().Contain("jsonPointers:");
appset.Should().Contain("- /spec/volumeClaimTemplates");
appset.Should().Contain(".spec.volumeClaimTemplates[]?.status");
Regex.Matches(appset, "kind: StatefulSet").Should().HaveCount(4);
foreach (var (name, ns) in new[]
{
("zabbix-postgres", "zabbix"),
("guac-mysql", "guacamole"),
("matrix-postgres", "matrix"),
("authentik-postgres", "authentik"),
})
{
appset.Should().Contain($"name: {name}");
appset.Should().Contain($"namespace: {ns}");
}
}
[Fact] [Fact]
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable() public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
{ {
@@ -742,6 +650,7 @@ public sealed class FleetManifestLintTests
"certificate-web.yaml", "certificate-web.yaml",
"clusterrole-operator.yaml", "clusterrole-operator.yaml",
"clusterrolebinding-operator.yaml", "clusterrolebinding-operator.yaml",
"crds.yaml",
"deployment-operator.yaml", "deployment-operator.yaml",
"deployment-web.yaml", "deployment-web.yaml",
"ingressroute-web.yaml", "ingressroute-web.yaml",
@@ -831,7 +740,8 @@ public sealed class FleetManifestLintTests
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator"); .Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
var allScalars = clusterRole.AllScalars().ToList(); var allScalars = clusterRole.AllScalars().ToList();
allScalars.Should().Contain("devices.flowercore.io"); allScalars.Should().Contain("flowercore.io");
allScalars.Should().NotContain("devices.flowercore.io");
allScalars.Should().Contain("*"); allScalars.Should().Contain("*");
allScalars.Should().Contain("deployments"); allScalars.Should().Contain("deployments");
allScalars.Should().Contain("get"); allScalars.Should().Contain("get");
@@ -860,7 +770,7 @@ public sealed class FleetManifestLintTests
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret"); FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
appText.Should().Contain("secretKeyRef:"); appText.Should().Contain("secretKeyRef:");
appText.Should().Contain("secretName: fc-devicemgmt-runtime"); appText.Should().Contain("name: fc-devicemgmt-runtime");
appText.Should().NotContain("stringData:"); appText.Should().NotContain("stringData:");
appText.Should().NotContain("from-literal"); appText.Should().NotContain("from-literal");
appText.Should().NotContain("tls.key:"); appText.Should().NotContain("tls.key:");
@@ -957,9 +867,9 @@ public sealed class FleetManifestLintTests
{ {
var deployments = new[] var deployments = new[]
{ {
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client"), (App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client", AuthEnabled: "false"),
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client"), (App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client", AuthEnabled: "true"),
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client"), (App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client", AuthEnabled: "true"),
}; };
foreach (var expected in deployments) foreach (var expected in deployments)
@@ -968,7 +878,7 @@ public sealed class FleetManifestLintTests
.Single(document => document.Kind == "Deployment" && document.Name == expected.Name); .Single(document => document.Kind == "Deployment" && document.Name == expected.Name);
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject; var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true"); EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be(expected.AuthEnabled);
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true"); EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
(EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId")) (EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId"))
.Should() .Should()
@@ -1017,7 +927,7 @@ public sealed class FleetManifestLintTests
var dnsPvc = AppDocuments("fc-dns") var dnsPvc = AppDocuments("fc-dns")
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data"); .Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data");
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260604-oidc-proper"); ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260612-l4dns-a5d2849");
dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn"); dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi"); dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi");