Compare commits
23 Commits
codex/s67-
...
codex/l4-w
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8e2c960be3 | ||
|
|
c482b66187 | ||
|
|
bacb756173 | ||
|
|
8a576c95ed | ||
|
|
41c2243f09 | ||
|
|
c21e602e4d | ||
|
|
9f6b71c400 | ||
|
|
26f90acf1f | ||
|
|
ab00d22657 | ||
|
|
c1a43c64b3 | ||
|
|
7103658342 | ||
|
|
6b12b2bb49 | ||
|
|
a4c9e44a36 | ||
|
|
9674a9555e | ||
|
|
318252da76 | ||
|
|
3798b7c00e | ||
|
|
2707f1ae1e | ||
|
|
a7e7c1ae72 | ||
|
|
c8df788d72 | ||
|
|
b1a4d7120e | ||
|
|
4b57b8e939 | ||
|
|
70f36c546b | ||
|
|
cdbddd71af |
16
README.md
16
README.md
@@ -2,22 +2,6 @@
|
||||
|
||||
Infrastructure manifests for ArgoCD. An `ApplicationSet` in `argocd` namespace watches the `apps/*` directories in this repo and creates one `Application` per subdir (prefixed `infra-<name>`).
|
||||
|
||||
## Root GitOps ApplicationSet
|
||||
|
||||
`argocd/applicationset-bluejay-infra.yaml` is the root of this GitOps tree, but
|
||||
it is **NOT self-managed** by ArgoCD. Apply it manually when the root generator
|
||||
or sync policy changes:
|
||||
|
||||
```bash
|
||||
kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml
|
||||
```
|
||||
|
||||
Keep the per-StatefulSet `ignoreDifferences` entries in that file synced with
|
||||
the live ApplicationSet. They intentionally cover `zabbix-postgres`,
|
||||
`guac-mysql`, `matrix-postgres`, and `authentik-postgres` so ArgoCD does not
|
||||
loop forever on server-side-apply `volumeClaimTemplates` status drift. Every new
|
||||
StatefulSet with `volumeClaimTemplates` needs its own entry appended.
|
||||
|
||||
## Adding a new service to the cluster
|
||||
|
||||
Follow these steps in order. **Step 1 must run before step 3** — if you skip it, cert-manager HTTP-01 will silently fail for ~2h per cert (exponential backoff) until someone diagnoses the DNS.
|
||||
|
||||
@@ -14,6 +14,20 @@
|
||||
# cluster-rebuild repeatability. See
|
||||
# feedback_networkpolicies_belong_in_bluejay_infra.md.
|
||||
---
|
||||
# OIDC client secret for the RemoteDesktop end-user sign-in (fleet regroup L9,
|
||||
# 2026-06-12). The Authentik provider `remotedesktop` already exists; the 1P item
|
||||
# `remotedesktop-oidc-client` (vault IAmWorkin) carries issuer_url / client_id /
|
||||
# client_secret, and the 1Password operator mints the same-named K8s Secret that
|
||||
# k8s/web-deployment.yaml (FlowerCore.RemoteDesktop repo) consumes with
|
||||
# optional:true. Gate stays OFF (Q-RD-16) — this is flip-READINESS only.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: remotedesktop-oidc-client
|
||||
namespace: fc-desktop
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/remotedesktop-oidc-client"
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
|
||||
@@ -11,7 +11,7 @@ metadata:
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
rules:
|
||||
- apiGroups:
|
||||
- devices.flowercore.io
|
||||
- flowercore.io
|
||||
resources:
|
||||
- '*'
|
||||
verbs:
|
||||
@@ -23,7 +23,7 @@ rules:
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- devices.flowercore.io
|
||||
- flowercore.io
|
||||
resources:
|
||||
- devices/status
|
||||
- devices/finalizers
|
||||
@@ -33,6 +33,8 @@ rules:
|
||||
- devicepolicies/finalizers
|
||||
- remotecommands/status
|
||||
- remotecommands/finalizers
|
||||
- desiredstatedocuments/status
|
||||
- desiredstatedocuments/finalizers
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
|
||||
186
apps/fc-devicemgmt/crds.yaml
Normal file
186
apps/fc-devicemgmt/crds.yaml
Normal file
@@ -0,0 +1,186 @@
|
||||
# FlowerCore.DeviceManagement CRDs.
|
||||
#
|
||||
# These CRDs match the current operator annotations:
|
||||
# [KubernetesEntity(Group = "flowercore.io", ApiVersion = "v1alpha1", ...)]
|
||||
# Keep the schemas intentionally permissive until the DeviceManagement operator
|
||||
# grows enforced CRD validation.
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: devices.flowercore.io
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
group: flowercore.io
|
||||
scope: Namespaced
|
||||
names:
|
||||
plural: devices
|
||||
singular: device
|
||||
kind: Device
|
||||
listKind: DeviceList
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
status:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: devicegroups.flowercore.io
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
group: flowercore.io
|
||||
scope: Namespaced
|
||||
names:
|
||||
plural: devicegroups
|
||||
singular: devicegroup
|
||||
kind: DeviceGroup
|
||||
listKind: DeviceGroupList
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
status:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: devicepolicies.flowercore.io
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
group: flowercore.io
|
||||
scope: Namespaced
|
||||
names:
|
||||
plural: devicepolicies
|
||||
singular: devicepolicy
|
||||
kind: DevicePolicy
|
||||
listKind: DevicePolicyList
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
status:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: remotecommands.flowercore.io
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
group: flowercore.io
|
||||
scope: Namespaced
|
||||
names:
|
||||
plural: remotecommands
|
||||
singular: remotecommand
|
||||
kind: RemoteCommand
|
||||
listKind: RemoteCommandList
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
status:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: desiredstatedocuments.flowercore.io
|
||||
labels:
|
||||
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||
app.kubernetes.io/component: operator
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
group: flowercore.io
|
||||
scope: Namespaced
|
||||
names:
|
||||
plural: desiredstatedocuments
|
||||
singular: desiredstatedocument
|
||||
kind: DesiredStateDocument
|
||||
listKind: DesiredStateDocumentList
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
type: object
|
||||
properties:
|
||||
spec:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
status:
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
@@ -5,21 +5,35 @@
|
||||
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||
# nodes before letting ArgoCD sync a live rollout.
|
||||
#
|
||||
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
||||
# The Web pod cannot start until TWO upstream gaps close:
|
||||
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
||||
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
||||
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
||||
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
||||
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
||||
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
||||
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
||||
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
||||
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
||||
# password configured for the MySQL user.
|
||||
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
||||
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
||||
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
||||
# LIVE — 2026-06-11 DeviceManagement product-host enablement.
|
||||
# The current DeviceManagement Web source is SQLite-backed in Program.cs, so
|
||||
# Phase 1 production uses a Longhorn RWO PVC at /data/devicemgmt.db. The
|
||||
# 1Password runtime item stays mounted through env for future MySQL/API-key
|
||||
# cutover, but MySQL is not required for this first product-host rollout.
|
||||
# Image v20260611-healthz is built from FlowerCore.DeviceManagement master
|
||||
# 3c15f3b, which adds the /healthz alias required by fleet monitoring.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: fc-devicemgmt-web-data
|
||||
namespace: fc-devicemgmt
|
||||
labels:
|
||||
app: fc-devicemgmt-web
|
||||
app.kubernetes.io/name: fc-devicemgmt-web
|
||||
app.kubernetes.io/component: web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
app.kubernetes.io/managed-by: argocd
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 1Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
@@ -36,7 +50,7 @@ metadata:
|
||||
annotations:
|
||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||
spec:
|
||||
replicas: 0
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
@@ -64,7 +78,7 @@ spec:
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-devicemgmt-web:v20260512-cx5
|
||||
image: localhost/fc-devicemgmt-web:v20260611-healthz
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- name: http
|
||||
@@ -77,29 +91,21 @@ spec:
|
||||
value: "Production"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
- name: HOME
|
||||
value: "/data"
|
||||
- name: FlowerCore__Service__Name
|
||||
value: "FlowerCore.DeviceManagement.Web"
|
||||
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||
value: "system"
|
||||
- name: FlowerCore__Database__Provider
|
||||
value: "MySql"
|
||||
- name: FlowerCore__Database__Host
|
||||
value: "mysql.fc-mysql.svc"
|
||||
- name: FlowerCore__Database__Database
|
||||
value: "flowercore_devicemgmt"
|
||||
- name: FlowerCore__Database__User
|
||||
value: "fc_devicemgmt"
|
||||
value: "Sqlite"
|
||||
- name: FlowerCore__Database__ConnectionStrings__Sqlite
|
||||
value: "Data Source=/data/devicemgmt.db"
|
||||
- name: FlowerCore__Database__Password
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: fc-devicemgmt-runtime
|
||||
key: DB-Password
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
|
||||
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
|
||||
value: "/secrets/devicemgmt-mtls/mtls-client.key"
|
||||
- name: FlowerCore__EventBus__Redis__Configuration
|
||||
value: "redis.fc-redis.svc:6379"
|
||||
resources:
|
||||
@@ -136,19 +142,17 @@ spec:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
- name: devicemgmt-mtls
|
||||
mountPath: /secrets/devicemgmt-mtls
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: fc-devicemgmt-web-data
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
- name: devicemgmt-mtls
|
||||
secret:
|
||||
secretName: fc-devicemgmt-runtime
|
||||
defaultMode: 0400
|
||||
|
||||
@@ -48,7 +48,7 @@ data:
|
||||
{
|
||||
"FlowerCore": {
|
||||
"Auth": {
|
||||
"Enabled": true,
|
||||
"Enabled": false,
|
||||
"Oidc": {
|
||||
"Enabled": true,
|
||||
"Audience": "dns",
|
||||
@@ -111,7 +111,7 @@ spec:
|
||||
fsGroup: 1654
|
||||
containers:
|
||||
- name: dns-web
|
||||
image: localhost/fc-dns-web:v20260604-oidc-proper
|
||||
image: localhost/fc-dns-web:v20260612-l4dns-a5d2849
|
||||
imagePullPolicy: Never
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
@@ -149,7 +149,7 @@ spec:
|
||||
key: client_secret
|
||||
optional: true
|
||||
- name: FlowerCore__Auth__Enabled
|
||||
value: "true"
|
||||
value: "false"
|
||||
- name: FlowerCore__Auth__Oidc__Enabled
|
||||
value: "true"
|
||||
- name: FlowerCore__Auth__Oidc__Audience
|
||||
@@ -303,7 +303,7 @@ spec:
|
||||
fsGroup: 1654
|
||||
containers:
|
||||
- name: dns-acme-webhook
|
||||
image: localhost/fc-dns-acme-webhook:v202604290845
|
||||
image: localhost/fc-dns-acme-webhook:v20260612-l4dns-a5d2849
|
||||
imagePullPolicy: Never
|
||||
securityContext:
|
||||
readOnlyRootFilesystem: true
|
||||
|
||||
@@ -535,7 +535,7 @@ spec:
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema
|
||||
image: localhost/fc-ttsreader-web:v20260612-readalong-corrections
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5217
|
||||
|
||||
@@ -12,6 +12,8 @@ All repo-scoped Linux runners use:
|
||||
- `ACCESS_TOKEN` from the `github-runner-token` Secret
|
||||
- `RUN_AS_ROOT=false`
|
||||
- `EPHEMERAL=true`
|
||||
- `DISABLE_AUTO_UPDATE=true` so the runner does not self-update and exit inside
|
||||
the immutable Kubernetes pod
|
||||
- `LABELS=self-hosted,linux,fc-build-linux`
|
||||
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
|
||||
Actions tool cache
|
||||
@@ -24,12 +26,6 @@ original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
|
||||
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
|
||||
strategy: no two pods share one RWO PVC.
|
||||
|
||||
Ephemeral runner pods are expected to register, run one job, deregister, and
|
||||
exit so the Deployment starts a fresh pod for the next registration token. A
|
||||
small amount of exit-1/restart churn from token-expiry or no-work windows is
|
||||
accepted operational noise as long as jobs are not stuck queued and the
|
||||
repo-scoped runner-offline alerts stay quiet.
|
||||
|
||||
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
||||
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
|
||||
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
|
||||
@@ -137,3 +133,7 @@ from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
||||
value does not change.
|
||||
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
|
||||
stay single-replica. New multi-replica runners use `emptyDir`.
|
||||
- Runner pods repeatedly registering, downloading a newer Actions runner, then
|
||||
exiting with code 4: verify `DISABLE_AUTO_UPDATE=true` is present. The image
|
||||
translates that into `config.sh --disableupdate`; without it, the Deployment
|
||||
controller sees the expected self-update exit as CrashLoopBackOff.
|
||||
|
||||
@@ -195,6 +195,11 @@ spec:
|
||||
# fresh registration occurs. Prevents stale runner accumulation.
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
# Labels used by workflow files: runs-on: [self-hosted, linux, fc-build-linux]
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
@@ -366,6 +371,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -504,6 +514,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -636,6 +651,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -768,6 +788,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -900,6 +925,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1035,6 +1065,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1167,6 +1202,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1299,6 +1339,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1431,6 +1476,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1565,6 +1615,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1699,6 +1754,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1838,6 +1898,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -1972,6 +2037,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2106,6 +2176,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2240,6 +2315,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2373,6 +2453,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2507,6 +2592,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2640,6 +2730,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2773,6 +2868,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -2906,6 +3006,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3039,6 +3144,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3172,6 +3282,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3306,6 +3421,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3440,6 +3560,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3574,6 +3699,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3708,6 +3838,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3842,6 +3977,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -3975,6 +4115,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -4109,6 +4254,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -4247,6 +4397,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -4386,6 +4541,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
@@ -4521,6 +4681,11 @@ spec:
|
||||
value: "/tmp/runner/work"
|
||||
- name: EPHEMERAL
|
||||
value: "true"
|
||||
# The runner image must not self-update inside an immutable
|
||||
# Kubernetes pod. Without this, GitHub runner auto-update exits
|
||||
# with code 4 and the Deployment falls into CrashLoopBackOff.
|
||||
- name: DISABLE_AUTO_UPDATE
|
||||
value: "true"
|
||||
- name: LABELS
|
||||
value: "self-hosted,linux,fc-build-linux"
|
||||
- name: HOME
|
||||
|
||||
@@ -46,7 +46,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: intranet-web
|
||||
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge
|
||||
image: localhost/fc-intranet-web:v20260612-screenshot-metadata
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5300
|
||||
@@ -60,14 +60,17 @@ spec:
|
||||
# ≈ 9 hours. BLUEJAY-WS GPU (R9700, 32GB VRAM) does the same work
|
||||
# in minutes. Memory: feedback_pi5_nomic_embed_slow.
|
||||
- name: IntranetSearch__OllamaBaseUrl
|
||||
value: "http://10.0.56.20:11434"
|
||||
# Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence
|
||||
# on the writable PVC at /data. Without this env var the
|
||||
# intranet falls back to the in-memory store (loses state on
|
||||
# pod restart). Master's PageReadingOverrideOptions binds
|
||||
# PageReadingOverrides:FilePath.
|
||||
- name: PageReadingOverrides__FilePath
|
||||
value: "/data/page-reading-overrides.json"
|
||||
value: "http://edge1.iamworkin.lan:11434"
|
||||
# External Notes corpus roots are not mounted in the live pod today.
|
||||
# Keep the curated/workflow docs directory active without logging
|
||||
# repeated /srv/flowercore-notes missing-root warnings.
|
||||
- name: IntranetSearch__Enabled
|
||||
value: "false"
|
||||
# Page-reading override SQLite persistence on the writable PVC at
|
||||
# /data. This backs pronunciation, notes, corrections, and
|
||||
# page-profile metadata across pod restarts.
|
||||
- name: PageReadingOverrides__DatabasePath
|
||||
value: "/data/page-reading-overrides.db"
|
||||
- name: KnowledgeFleetSearch__BaseUrl
|
||||
value: "https://knowledge.iamworkin.lan"
|
||||
- name: KnowledgeFleetSearch__ApiKey
|
||||
|
||||
@@ -843,9 +843,7 @@ data:
|
||||
rules:
|
||||
- alert: PiManagerDown
|
||||
expr: up{job="pimanager-app"} == 0
|
||||
# Sprint 67: delayed behind NodeDown's critical page so a powered-off
|
||||
# Pi does not create the first duplicate page for the same host.
|
||||
for: 8m
|
||||
for: 3m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
@@ -1244,58 +1242,6 @@ data:
|
||||
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||
|
||||
# ============================================================
|
||||
# Update Center public-edge probes
|
||||
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||
# This K8s ConfigMap is the future migration target; live Prometheus
|
||||
# still reads the canonical Notes file from noc1 Podman.
|
||||
# ============================================================
|
||||
- name: update_center
|
||||
rules:
|
||||
# Critical only when the edge is genuinely unreachable. A Cloudflare
|
||||
# HTTP 429 means the prober hit a rate-limit, not that real clients
|
||||
# are down, so the warning rule below owns that signal.
|
||||
- alert: UpdateCenterPublicEdgeDown
|
||||
expr: |
|
||||
(probe_success{job="probe-update-center-public-edge"} == 0)
|
||||
unless on(instance)
|
||||
(probe_http_status_code{job="probe-update-center-public-edge"} == 429)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
service: update-center
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Update Center public edge probe failed for {{ $labels.instance }}"
|
||||
description: >-
|
||||
The external probe for {{ $labels.instance }} failed for 10 minutes with a
|
||||
non-2xx status that is not a rate-limit. Public Update Center clients may be
|
||||
unable to fetch manifest schema metadata through Cloudflare.
|
||||
runbook: >-
|
||||
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema
|
||||
2. Verify Cloudflare DNS record is proxied and targets the current public edge IP
|
||||
3. kubectl -n fc-updater get ingressroute updatecenter-web-public secret cf-origin-flowercore-io
|
||||
4. Check Traefik logs for Method() or TLS secret errors
|
||||
|
||||
- alert: UpdateCenterPublicEdgeRateLimited
|
||||
expr: probe_http_status_code{job="probe-update-center-public-edge"} == 429
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
service: update-center
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Cloudflare is rate-limiting (HTTP 429) the public-edge probe for {{ $labels.instance }}"
|
||||
description: >-
|
||||
The blackbox prober receives HTTP 429 from Cloudflare for {{ $labels.instance }}
|
||||
while the origin is healthy. This is a Cloudflare rate-limit / WAF condition on
|
||||
the public hostname, not an outage.
|
||||
runbook: >-
|
||||
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema (expect 200 from a normal client)
|
||||
2. Review Cloudflare rate-limit / WAF rules for the hostname; the 5m-cadence prober is tripping a 429
|
||||
3. Add a Cloudflare rate-limit exception for the prober source IP or the /api/v1/manifests/_schema path
|
||||
4. Confirm whether the singular host update.flowercore.io is still required, or only updates.flowercore.io
|
||||
|
||||
# =============================================================================
|
||||
# ConfigMap: Blackbox Exporter Configuration
|
||||
# =============================================================================
|
||||
|
||||
@@ -12,28 +12,27 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||
in pfSense Unbound before this manifest is applied, or cert-manager
|
||||
HTTP-01 silently exponential-backs-off ~2h.
|
||||
Memory: `feedback_pfsense_dns_required_for_acme`.
|
||||
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
|
||||
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
|
||||
`rke2-agent2` (10.0.56.13). Build with:
|
||||
2. **Image import to ALL Ready RKE2 nodes** — pod can currently schedule to
|
||||
`rke2-server` (10.0.56.11) and `rke2-agent1` (10.0.56.12). Build with:
|
||||
```bash
|
||||
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||
mkdir -p artifacts/deploy
|
||||
podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
|
||||
for h in 10.0.56.11 10.0.56.12; do
|
||||
ssh fcadmin@$h "mkdir -p /home/fcadmin/.fcv"
|
||||
scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
|
||||
ssh fcadmin@$h \
|
||||
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||
-n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
|
||||
done
|
||||
```
|
||||
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||
4. **First production render** — open
|
||||
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
|
||||
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
|
||||
generated images. This Sprint 32 visitor-safe profile uses
|
||||
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
|
||||
for an operator-owned GPU render lane.
|
||||
4. **First production render** — verify
|
||||
`https://worldbuilder.iamworkin.lan/healthz`, open
|
||||
`https://worldbuilder.iamworkin.lan/settings`, and confirm the image backend
|
||||
reports ComfyUI before running an operator-owned render lane.
|
||||
|
||||
## Health probes
|
||||
|
||||
@@ -56,13 +55,8 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||
|
||||
## Image generation backend
|
||||
|
||||
Sprint 32 pins the Kubernetes profile to
|
||||
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
|
||||
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
|
||||
deterministic, avoids GPU exposure, and still exercises the studio/gallery
|
||||
surface with persisted generated-image metadata.
|
||||
|
||||
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
|
||||
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
|
||||
operator-owned follow-up that also verifies workstation reachability and image
|
||||
import freshness.
|
||||
The live internal profile now uses
|
||||
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=comfyui` with
|
||||
`BaseUrl=http://10.0.56.20:8188` on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2).
|
||||
Keep the public host pre-staging disabled unless the five safe-to-expose gates
|
||||
are rechecked; the live GPU lane is operator-owned and internal-only.
|
||||
|
||||
@@ -5,10 +5,10 @@
|
||||
#
|
||||
# Image build (BLUEJAY-WS):
|
||||
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||
# podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
|
||||
# for h in 10.0.56.11 10.0.56.12; do
|
||||
# scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
|
||||
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
|
||||
# done
|
||||
---
|
||||
apiVersion: v1
|
||||
@@ -90,7 +90,7 @@ spec:
|
||||
containers:
|
||||
- name: web
|
||||
# Bump tag for each rebuild. Initial deploy: v202605062048
|
||||
image: localhost/fc-worldbuilder:v202605062048
|
||||
image: localhost/fc-worldbuilder:v202606121657-35aaa2c-gpu
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
@@ -117,14 +117,16 @@ spec:
|
||||
value: "/data/gallery"
|
||||
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||
value: "/data/exports"
|
||||
# Visitor-safe Sprint 32 profile: fake backend keeps public demo
|
||||
# rendering deterministic and avoids exposing BLUEJAY-WS GPU.
|
||||
# Operator-approved live GPU lane. Internal-only host targets
|
||||
# BLUEJAY-WS ComfyUI; keep public host pre-staging disabled below.
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||
value: "http://127.0.0.1:1"
|
||||
value: "http://10.0.56.20:8188"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||
value: "fake"
|
||||
value: "comfyui"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
|
||||
value: "fake"
|
||||
value: "comfyui"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__VisitorSafe
|
||||
value: "false"
|
||||
resources:
|
||||
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
|
||||
# time) while actual CPU usage is well below capacity. Idle Blazor
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: ApplicationSet
|
||||
metadata:
|
||||
annotations:
|
||||
argocd.argoproj.io/refresh: "true"
|
||||
name: bluejay-infra
|
||||
namespace: argocd
|
||||
spec:
|
||||
generators:
|
||||
- git:
|
||||
directories:
|
||||
- path: apps/*
|
||||
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
|
||||
revision: main
|
||||
template:
|
||||
metadata: {}
|
||||
spec:
|
||||
destination: {}
|
||||
project: ""
|
||||
goTemplate: true
|
||||
goTemplateOptions:
|
||||
- missingkey=error
|
||||
template:
|
||||
metadata:
|
||||
name: infra-{{.path.basename}}
|
||||
spec:
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
ignoreDifferences:
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: zabbix-postgres
|
||||
namespace: zabbix
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: guac-mysql
|
||||
namespace: guacamole
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: matrix-postgres
|
||||
namespace: matrix
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: authentik-postgres
|
||||
namespace: authentik
|
||||
project: default
|
||||
source:
|
||||
path: '{{.path.path}}'
|
||||
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
|
||||
targetRevision: main
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true
|
||||
- RespectIgnoreDifferences=true
|
||||
@@ -272,6 +272,7 @@ public sealed class FleetManifestLintTests
|
||||
var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject;
|
||||
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
||||
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
||||
EnvValue(container, "DISABLE_AUTO_UPDATE").Should().Be("true", $"{expectedRunner.Key} must not self-update inside immutable Kubernetes runner pods");
|
||||
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
||||
EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
|
||||
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
|
||||
@@ -468,99 +469,6 @@ public sealed class FleetManifestLintTests
|
||||
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GithubRunnerReadme_DocumentsAcceptedEphemeralExitChurn()
|
||||
{
|
||||
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "github-runner", "README.md"));
|
||||
|
||||
readme.Should().Contain("Ephemeral runner pods");
|
||||
readme.Should().Contain("exit-1/restart churn");
|
||||
readme.Should().Contain("accepted operational noise");
|
||||
readme.Should().Contain("repo-scoped runner-offline alerts stay quiet");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_PiManagerDownDelayAndUpdateCenterRateLimit_MatchCanonicalAlerts()
|
||||
{
|
||||
var notesAlerts = File.ReadAllText(Path.Combine(
|
||||
Inventory.WorkspaceRoot,
|
||||
"FlowerCore.Notes",
|
||||
"scripts",
|
||||
"monitoring",
|
||||
"alerts.yml"));
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
notesAlerts.Should().Contain("# Sprint 67: keep this warning behind NodeDown's 5m critical page");
|
||||
notesAlerts.Should().Contain("- alert: PiManagerDown");
|
||||
notesAlerts.Should().Contain("for: 8m");
|
||||
monitoring.Should().Contain("# Sprint 67: delayed behind NodeDown's critical page");
|
||||
monitoring.Should().Contain("- alert: PiManagerDown");
|
||||
monitoring.Should().Contain("for: 8m");
|
||||
|
||||
notesAlerts.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
|
||||
notesAlerts.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
|
||||
notesAlerts.Should().Contain("for: 15m");
|
||||
monitoring.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
|
||||
monitoring.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
|
||||
monitoring.Should().Contain("for: 15m");
|
||||
monitoring.Should().Contain("severity: warning");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustRemainManualRootOfGitOpsTree()
|
||||
{
|
||||
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "README.md"));
|
||||
var appsetPath = Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml");
|
||||
|
||||
File.Exists(appsetPath).Should().BeTrue();
|
||||
var appset = File.ReadAllText(appsetPath);
|
||||
|
||||
appset.Should().Contain("kind: ApplicationSet");
|
||||
appset.Should().Contain("name: bluejay-infra");
|
||||
appset.Should().NotContain("\nstatus:");
|
||||
appset.Should().NotContain("managedFields:");
|
||||
readme.Should().Contain("root of this GitOps tree");
|
||||
readme.Should().Contain("NOT self-managed");
|
||||
readme.Should().Contain("kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustDiscoverAppsDirectoryOnMain()
|
||||
{
|
||||
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
|
||||
|
||||
appset.Should().Contain("path: apps/*");
|
||||
appset.Should().Contain("revision: main");
|
||||
appset.Should().Contain("repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git");
|
||||
appset.Should().Contain("path: '{{.path.path}}'");
|
||||
appset.Should().Contain("targetRevision: main");
|
||||
appset.Should().Contain("ServerSideApply=true");
|
||||
appset.Should().Contain("RespectIgnoreDifferences=true");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustPreserveStatefulSetIgnoreDifferences()
|
||||
{
|
||||
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
|
||||
|
||||
appset.Should().Contain("jsonPointers:");
|
||||
appset.Should().Contain("- /spec/volumeClaimTemplates");
|
||||
appset.Should().Contain(".spec.volumeClaimTemplates[]?.status");
|
||||
Regex.Matches(appset, "kind: StatefulSet").Should().HaveCount(4);
|
||||
|
||||
foreach (var (name, ns) in new[]
|
||||
{
|
||||
("zabbix-postgres", "zabbix"),
|
||||
("guac-mysql", "guacamole"),
|
||||
("matrix-postgres", "matrix"),
|
||||
("authentik-postgres", "authentik"),
|
||||
})
|
||||
{
|
||||
appset.Should().Contain($"name: {name}");
|
||||
appset.Should().Contain($"namespace: {ns}");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
|
||||
{
|
||||
@@ -742,6 +650,7 @@ public sealed class FleetManifestLintTests
|
||||
"certificate-web.yaml",
|
||||
"clusterrole-operator.yaml",
|
||||
"clusterrolebinding-operator.yaml",
|
||||
"crds.yaml",
|
||||
"deployment-operator.yaml",
|
||||
"deployment-web.yaml",
|
||||
"ingressroute-web.yaml",
|
||||
@@ -831,7 +740,8 @@ public sealed class FleetManifestLintTests
|
||||
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
|
||||
var allScalars = clusterRole.AllScalars().ToList();
|
||||
|
||||
allScalars.Should().Contain("devices.flowercore.io");
|
||||
allScalars.Should().Contain("flowercore.io");
|
||||
allScalars.Should().NotContain("devices.flowercore.io");
|
||||
allScalars.Should().Contain("*");
|
||||
allScalars.Should().Contain("deployments");
|
||||
allScalars.Should().Contain("get");
|
||||
@@ -860,7 +770,7 @@ public sealed class FleetManifestLintTests
|
||||
|
||||
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
|
||||
appText.Should().Contain("secretKeyRef:");
|
||||
appText.Should().Contain("secretName: fc-devicemgmt-runtime");
|
||||
appText.Should().Contain("name: fc-devicemgmt-runtime");
|
||||
appText.Should().NotContain("stringData:");
|
||||
appText.Should().NotContain("from-literal");
|
||||
appText.Should().NotContain("tls.key:");
|
||||
@@ -957,9 +867,9 @@ public sealed class FleetManifestLintTests
|
||||
{
|
||||
var deployments = new[]
|
||||
{
|
||||
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client"),
|
||||
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client"),
|
||||
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client"),
|
||||
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client", AuthEnabled: "false"),
|
||||
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client", AuthEnabled: "true"),
|
||||
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client", AuthEnabled: "true"),
|
||||
};
|
||||
|
||||
foreach (var expected in deployments)
|
||||
@@ -968,7 +878,7 @@ public sealed class FleetManifestLintTests
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == expected.Name);
|
||||
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
|
||||
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
|
||||
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be(expected.AuthEnabled);
|
||||
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
|
||||
(EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId"))
|
||||
.Should()
|
||||
@@ -1017,7 +927,7 @@ public sealed class FleetManifestLintTests
|
||||
var dnsPvc = AppDocuments("fc-dns")
|
||||
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data");
|
||||
|
||||
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260604-oidc-proper");
|
||||
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260612-l4dns-a5d2849");
|
||||
dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
|
||||
dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user