Compare commits
9 Commits
codex/s60-
...
codex/s67-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
13d8ca8c1a | ||
|
|
b0a3ef7448 | ||
|
|
81ac1f3e4f | ||
| b842738a0e | |||
|
|
f0cb7a5e81 | ||
| ac0f665323 | |||
|
|
c4b08f41ab | ||
|
|
417d3830ae | ||
| cb4ea13e7a |
16
README.md
16
README.md
@@ -2,6 +2,22 @@
|
||||
|
||||
Infrastructure manifests for ArgoCD. An `ApplicationSet` in `argocd` namespace watches the `apps/*` directories in this repo and creates one `Application` per subdir (prefixed `infra-<name>`).
|
||||
|
||||
## Root GitOps ApplicationSet
|
||||
|
||||
`argocd/applicationset-bluejay-infra.yaml` is the root of this GitOps tree, but
|
||||
it is **NOT self-managed** by ArgoCD. Apply it manually when the root generator
|
||||
or sync policy changes:
|
||||
|
||||
```bash
|
||||
kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml
|
||||
```
|
||||
|
||||
Keep the per-StatefulSet `ignoreDifferences` entries in that file synced with
|
||||
the live ApplicationSet. They intentionally cover `zabbix-postgres`,
|
||||
`guac-mysql`, `matrix-postgres`, and `authentik-postgres` so ArgoCD does not
|
||||
loop forever on server-side-apply `volumeClaimTemplates` status drift. Every new
|
||||
StatefulSet with `volumeClaimTemplates` needs its own entry appended.
|
||||
|
||||
## Adding a new service to the cluster
|
||||
|
||||
Follow these steps in order. **Step 1 must run before step 3** — if you skip it, cert-manager HTTP-01 will silently fail for ~2h per cert (exponential backoff) until someone diagnoses the DNS.
|
||||
|
||||
@@ -113,7 +113,12 @@ spec:
|
||||
- name: pgdata
|
||||
mountPath: /var/lib/postgresql/data
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
# apiVersion/kind included deliberately: this STS was created via ArgoCD ServerSideApply,
|
||||
# so the live object carries PVC TypeMeta inside volumeClaimTemplates; omitting it here
|
||||
# leaves the app eternally OutOfSync even though kubectl SSA dry-run shows no change.
|
||||
- apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: pgdata
|
||||
spec:
|
||||
storageClassName: longhorn
|
||||
|
||||
@@ -46,6 +46,8 @@ spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
prometheus.io/scrape: "true"
|
||||
@@ -54,6 +56,7 @@ spec:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: aistation-web-config
|
||||
@@ -167,3 +170,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: aistation-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose aistation-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: aistation-web-public
|
||||
# namespace: fc-aistation
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`aistation.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: aistation-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: aistation-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -112,6 +112,8 @@ spec:
|
||||
app.kubernetes.io/name: chat-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
@@ -128,6 +130,7 @@ spec:
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: chat-web-config
|
||||
|
||||
@@ -51,3 +51,26 @@ spec:
|
||||
port: 8080
|
||||
tls:
|
||||
secretName: remotedesktop-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose remotedesktop-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: remotedesktop-web-public
|
||||
# namespace: fc-desktop
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`desktop.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: remotedesktop-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: remotedesktop-web
|
||||
# port: 8080
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -52,6 +52,8 @@ spec:
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
@@ -67,6 +69,7 @@ spec:
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: dms-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose dms-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: dms-web-public
|
||||
# namespace: fc-dms
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`dms.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: dms-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: dms-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -46,6 +46,8 @@ spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/health"
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
prometheus.io/scrape: "true"
|
||||
@@ -54,6 +56,7 @@ spec:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: library-web-config
|
||||
@@ -167,3 +170,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: library-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose library-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: library-web-public
|
||||
# namespace: fc-library
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`library.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: library-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: library-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -83,6 +83,8 @@ spec:
|
||||
app.kubernetes.io/name: fc-llm-bridge
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
@@ -116,6 +118,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
@@ -281,3 +284,26 @@ spec:
|
||||
port: 8080
|
||||
tls:
|
||||
secretName: fc-llm-bridge-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose fc-llm-bridge publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: fc-llm-bridge-public
|
||||
# namespace: fc-llm-bridge
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`llm-bridge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: fc-llm-bridge-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: fc-llm-bridge
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: menuboard-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose menuboard-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: menuboard-web-public
|
||||
# namespace: fc-menuboard
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`menuboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: menuboard-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: menuboard-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -41,6 +41,8 @@ spec:
|
||||
labels:
|
||||
app: messageboard-web
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/health"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
@@ -52,6 +54,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: messageboard-web-config
|
||||
@@ -141,3 +144,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: messageboard-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose messageboard-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: messageboard-web-public
|
||||
# namespace: fc-messageboard
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`messageboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: messageboard-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: messageboard-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 5300
|
||||
tls:
|
||||
secretName: mysql-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose mysql-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: mysql-web-public
|
||||
# namespace: fc-mysql
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`mysql.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: mysql-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: mysql-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 5400
|
||||
tls:
|
||||
secretName: php-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose php-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: php-web-public
|
||||
# namespace: fc-php
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`php.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: php-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: php-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: presentations-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose presentations-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: presentations-web-public
|
||||
# namespace: fc-presentations
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`presentations.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: presentations-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: presentations-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -46,6 +46,8 @@ spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
|
||||
prometheus.io/path: /metrics/prometheus
|
||||
prometheus.io/port: "5000"
|
||||
@@ -55,6 +57,7 @@ spec:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
containers:
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: retail-web-config
|
||||
@@ -168,3 +171,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: retail-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose retail-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: retail-web-public
|
||||
# namespace: fc-retail
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`retail.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: retail-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: retail-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -30,3 +30,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: scoreboard-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose scoreboard-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: scoreboard-web-public
|
||||
# namespace: fc-scoreboard
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`scoreboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: scoreboard-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: scoreboard-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -37,3 +37,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: segmentdisplay-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose segmentdisplay-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: segmentdisplay-web-public
|
||||
# namespace: fc-segmentdisplay
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`segmentdisplay.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: segmentdisplay-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: segmentdisplay-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -46,3 +46,26 @@ spec:
|
||||
services:
|
||||
- name: signage-web
|
||||
port: 5190
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose signage-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: signage-web-public
|
||||
# namespace: fc-signage
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`signage.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: signage-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: signage-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -97,6 +97,7 @@ spec:
|
||||
containers:
|
||||
- name: piper
|
||||
image: rhasspy/wyoming-piper:latest
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: PYTHONHTTPSVERIFY
|
||||
value: "0"
|
||||
@@ -523,6 +524,8 @@ spec:
|
||||
app.kubernetes.io/name: ttsreader-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/health"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "5217"
|
||||
prometheus.io/path: "/metrics"
|
||||
@@ -762,3 +765,26 @@ spec:
|
||||
port: 5217
|
||||
tls:
|
||||
secretName: ttsreader-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose ttsreader-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: ttsreader-web-public
|
||||
# namespace: fc-ttsreader
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`ttsreader.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: ttsreader-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: ttsreader-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -52,6 +52,9 @@ spec:
|
||||
app: updatecenter-web
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/"
|
||||
labels:
|
||||
app: updatecenter-web
|
||||
spec:
|
||||
@@ -63,6 +66,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: http://+:8080
|
||||
|
||||
@@ -24,6 +24,12 @@ original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
|
||||
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
|
||||
strategy: no two pods share one RWO PVC.
|
||||
|
||||
Ephemeral runner pods are expected to register, run one job, deregister, and
|
||||
exit so the Deployment starts a fresh pod for the next registration token. A
|
||||
small amount of exit-1/restart churn from token-expiry or no-work windows is
|
||||
accepted operational noise as long as jobs are not stuck queued and the
|
||||
repo-scoped runner-offline alerts stay quiet.
|
||||
|
||||
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
||||
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
|
||||
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
|
||||
|
||||
@@ -90,6 +90,8 @@ spec:
|
||||
app.kubernetes.io/name: knowledge-web
|
||||
app.kubernetes.io/part-of: bluejay-infra
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
@@ -117,6 +119,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
@@ -286,3 +289,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: knowledge-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose knowledge-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: knowledge-web-public
|
||||
# namespace: knowledge
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`knowledge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: knowledge-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: knowledge-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
@@ -843,7 +843,9 @@ data:
|
||||
rules:
|
||||
- alert: PiManagerDown
|
||||
expr: up{job="pimanager-app"} == 0
|
||||
for: 3m
|
||||
# Sprint 67: delayed behind NodeDown's critical page so a powered-off
|
||||
# Pi does not create the first duplicate page for the same host.
|
||||
for: 8m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
@@ -1242,6 +1244,58 @@ data:
|
||||
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||
|
||||
# ============================================================
|
||||
# Update Center public-edge probes
|
||||
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||
# This K8s ConfigMap is the future migration target; live Prometheus
|
||||
# still reads the canonical Notes file from noc1 Podman.
|
||||
# ============================================================
|
||||
- name: update_center
|
||||
rules:
|
||||
# Critical only when the edge is genuinely unreachable. A Cloudflare
|
||||
# HTTP 429 means the prober hit a rate-limit, not that real clients
|
||||
# are down, so the warning rule below owns that signal.
|
||||
- alert: UpdateCenterPublicEdgeDown
|
||||
expr: |
|
||||
(probe_success{job="probe-update-center-public-edge"} == 0)
|
||||
unless on(instance)
|
||||
(probe_http_status_code{job="probe-update-center-public-edge"} == 429)
|
||||
for: 10m
|
||||
labels:
|
||||
severity: critical
|
||||
service: update-center
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Update Center public edge probe failed for {{ $labels.instance }}"
|
||||
description: >-
|
||||
The external probe for {{ $labels.instance }} failed for 10 minutes with a
|
||||
non-2xx status that is not a rate-limit. Public Update Center clients may be
|
||||
unable to fetch manifest schema metadata through Cloudflare.
|
||||
runbook: >-
|
||||
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema
|
||||
2. Verify Cloudflare DNS record is proxied and targets the current public edge IP
|
||||
3. kubectl -n fc-updater get ingressroute updatecenter-web-public secret cf-origin-flowercore-io
|
||||
4. Check Traefik logs for Method() or TLS secret errors
|
||||
|
||||
- alert: UpdateCenterPublicEdgeRateLimited
|
||||
expr: probe_http_status_code{job="probe-update-center-public-edge"} == 429
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
service: update-center
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Cloudflare is rate-limiting (HTTP 429) the public-edge probe for {{ $labels.instance }}"
|
||||
description: >-
|
||||
The blackbox prober receives HTTP 429 from Cloudflare for {{ $labels.instance }}
|
||||
while the origin is healthy. This is a Cloudflare rate-limit / WAF condition on
|
||||
the public hostname, not an outage.
|
||||
runbook: >-
|
||||
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema (expect 200 from a normal client)
|
||||
2. Review Cloudflare rate-limit / WAF rules for the hostname; the 5m-cadence prober is tripping a 429
|
||||
3. Add a Cloudflare rate-limit exception for the prober source IP or the /api/v1/manifests/_schema path
|
||||
4. Confirm whether the singular host update.flowercore.io is still required, or only updates.flowercore.io
|
||||
|
||||
# =============================================================================
|
||||
# ConfigMap: Blackbox Exporter Configuration
|
||||
# =============================================================================
|
||||
|
||||
@@ -114,6 +114,9 @@ spec:
|
||||
app: telephony-web
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/health"
|
||||
labels:
|
||||
app: telephony-web
|
||||
spec:
|
||||
@@ -161,6 +164,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 5100
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: Telephony__Twilio__AccountSid
|
||||
valueFrom:
|
||||
@@ -387,4 +391,3 @@ spec:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -77,6 +77,8 @@ spec:
|
||||
flowercore.io/tenant-id: system
|
||||
flowercore.io/created-by: bluejay-infra
|
||||
annotations:
|
||||
fc.flowercore.io/healthz-anon: "true"
|
||||
fc.flowercore.io/probe-path: "/healthz"
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
@@ -93,6 +95,7 @@ spec:
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
@@ -254,3 +257,26 @@ spec:
|
||||
port: 80
|
||||
tls:
|
||||
secretName: worldbuilder-web-tls
|
||||
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
|
||||
# When the operator decides to expose worldbuilder-web publicly, uncomment + update the host,
|
||||
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
|
||||
#
|
||||
# --- IngressRoute ---
|
||||
# apiVersion: traefik.io/v1alpha1
|
||||
# kind: IngressRoute
|
||||
# metadata:
|
||||
# name: worldbuilder-web-public
|
||||
# namespace: worldbuilder
|
||||
# spec:
|
||||
# entryPoints: [websecure]
|
||||
# routes:
|
||||
# - match: Host(`worldbuilder.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||
# kind: Rule
|
||||
# middlewares:
|
||||
# - name: worldbuilder-web-public-profile-header # injects entitlement profile
|
||||
# services:
|
||||
# - name: worldbuilder-web
|
||||
# port: 80
|
||||
# tls: {}
|
||||
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
|
||||
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).
|
||||
|
||||
74
argocd/applicationset-bluejay-infra.yaml
Normal file
74
argocd/applicationset-bluejay-infra.yaml
Normal file
@@ -0,0 +1,74 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: ApplicationSet
|
||||
metadata:
|
||||
annotations:
|
||||
argocd.argoproj.io/refresh: "true"
|
||||
name: bluejay-infra
|
||||
namespace: argocd
|
||||
spec:
|
||||
generators:
|
||||
- git:
|
||||
directories:
|
||||
- path: apps/*
|
||||
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
|
||||
revision: main
|
||||
template:
|
||||
metadata: {}
|
||||
spec:
|
||||
destination: {}
|
||||
project: ""
|
||||
goTemplate: true
|
||||
goTemplateOptions:
|
||||
- missingkey=error
|
||||
template:
|
||||
metadata:
|
||||
name: infra-{{.path.basename}}
|
||||
spec:
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
ignoreDifferences:
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: zabbix-postgres
|
||||
namespace: zabbix
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: guac-mysql
|
||||
namespace: guacamole
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: matrix-postgres
|
||||
namespace: matrix
|
||||
- group: apps
|
||||
jqPathExpressions:
|
||||
- .spec.volumeClaimTemplates[]?.status
|
||||
jsonPointers:
|
||||
- /spec/volumeClaimTemplates
|
||||
kind: StatefulSet
|
||||
name: authentik-postgres
|
||||
namespace: authentik
|
||||
project: default
|
||||
source:
|
||||
path: '{{.path.path}}'
|
||||
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
|
||||
targetRevision: main
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- ServerSideApply=true
|
||||
- RespectIgnoreDifferences=true
|
||||
@@ -17,21 +17,17 @@ public sealed class FleetManifestLintTests
|
||||
"dist.flowercore.io",
|
||||
};
|
||||
|
||||
// Public hosts that allow a tightly bounded write surface in addition to
|
||||
// GET/HEAD. updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
|
||||
// Hosts that allow a tightly bounded write surface in addition to GET/HEAD.
|
||||
// updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
|
||||
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
|
||||
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
|
||||
// set should fail this lint.
|
||||
//
|
||||
// PUB-1 (2026-05-06): update.flowercore.io / updates.flowercore.io were
|
||||
// added for the Cloudflare-proxied public Update Center edge. They use the
|
||||
// same bounded read-write allowlist as the LAN pair.
|
||||
// PUT/PATCH/DELETE must still 404 at the route. Public
|
||||
// update.flowercore.io remains a GET/HEAD download surface in the
|
||||
// FlowerCore.Updater sibling manifest and is covered by the general
|
||||
// public-method allowlist lint instead of this write-surface rule.
|
||||
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
|
||||
{
|
||||
"updatecenter.iamworkin.lan",
|
||||
"updates.iamworkin.lan",
|
||||
"update.flowercore.io",
|
||||
"updates.flowercore.io",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
|
||||
@@ -69,7 +65,7 @@ public sealed class FleetManifestLintTests
|
||||
["github-runner-updater"] = "https://github.com/astoltz/FlowerCore.Updater",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
||||
private static readonly HashSet<string> RepoScopedLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
||||
{
|
||||
"github-runner-sharedpos",
|
||||
"github-runner-puppet",
|
||||
@@ -83,6 +79,44 @@ public sealed class FleetManifestLintTests
|
||||
"github-runner-updater",
|
||||
};
|
||||
|
||||
private static readonly IReadOnlyDictionary<string, (string Deployment, string ProbePath)> BroaderHardeningDeployments =
|
||||
new Dictionary<string, (string Deployment, string ProbePath)>(StringComparer.Ordinal)
|
||||
{
|
||||
["fc-aistation"] = ("aistation-web", "/healthz"),
|
||||
["fc-chat"] = ("chat-web", "/healthz"),
|
||||
["fc-devicemgmt"] = ("fc-devicemgmt-web", "/healthz"),
|
||||
["fc-library"] = ("library-web", "/health"),
|
||||
["fc-llm-bridge"] = ("fc-llm-bridge", "/healthz"),
|
||||
["fc-messageboard"] = ("messageboard-web", "/health"),
|
||||
["fc-retail"] = ("retail-web", "/healthz"),
|
||||
["fc-ttsreader"] = ("ttsreader-web", "/health"),
|
||||
["fc-updater"] = ("updatecenter-web", "/"),
|
||||
["knowledge"] = ("knowledge-web", "/healthz"),
|
||||
["telephony"] = ("telephony-web", "/health"),
|
||||
["worldbuilder"] = ("worldbuilder-web", "/healthz"),
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> BroaderHardeningInternalPrestageApps = new(StringComparer.Ordinal)
|
||||
{
|
||||
"fc-aistation",
|
||||
"fc-desktop",
|
||||
"fc-dms",
|
||||
"fc-library",
|
||||
"fc-llm-bridge",
|
||||
"fc-menuboard",
|
||||
"fc-messageboard",
|
||||
"fc-mysql",
|
||||
"fc-php",
|
||||
"fc-presentations",
|
||||
"fc-retail",
|
||||
"fc-scoreboard",
|
||||
"fc-segmentdisplay",
|
||||
"fc-signage",
|
||||
"fc-ttsreader",
|
||||
"knowledge",
|
||||
"worldbuilder",
|
||||
};
|
||||
|
||||
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||
{
|
||||
["HOME"] = "/home/runner",
|
||||
@@ -271,17 +305,17 @@ public sealed class FleetManifestLintTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForScaledDeployments()
|
||||
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForRepoScopedDeployments()
|
||||
{
|
||||
var deployments = GitHubRunnerDeployments();
|
||||
|
||||
foreach (var deploymentName in ScaledLinuxRunnerDeployments)
|
||||
foreach (var deploymentName in RepoScopedLinuxRunnerDeployments)
|
||||
{
|
||||
var deployment = deployments[deploymentName];
|
||||
// Scaled runners must have >= 2 replicas (avoid single-pod bottleneck).
|
||||
// Individual deployments may be tuned upward per CI activity — see
|
||||
// "runners: right-size replica counts per 14d CI activity (#24)".
|
||||
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(2, $"{deploymentName} is in the scaled set and must run with at least 2 replicas");
|
||||
// Sprint 34 ops trimmed runner load while the cluster was degraded
|
||||
// to two healthy nodes. Repo-scoped runners can be tuned back above
|
||||
// one replica, but they must stay RWO-safe before that happens.
|
||||
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(1, $"{deploymentName} must keep at least one repo-scoped runner online");
|
||||
|
||||
var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
|
||||
var claimNames = volumes
|
||||
@@ -289,7 +323,7 @@ public sealed class FleetManifestLintTests
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.ToList();
|
||||
|
||||
claimNames.Should().BeEmpty($"{deploymentName} is scaled and must not share a RWO PVC");
|
||||
claimNames.Should().BeEmpty($"{deploymentName} must remain ready for safe multi-replica scaling without sharing a RWO PVC");
|
||||
volumes.Should().Contain(volume =>
|
||||
string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
|
||||
&& ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
|
||||
@@ -434,6 +468,99 @@ public sealed class FleetManifestLintTests
|
||||
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void GithubRunnerReadme_DocumentsAcceptedEphemeralExitChurn()
|
||||
{
|
||||
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "github-runner", "README.md"));
|
||||
|
||||
readme.Should().Contain("Ephemeral runner pods");
|
||||
readme.Should().Contain("exit-1/restart churn");
|
||||
readme.Should().Contain("accepted operational noise");
|
||||
readme.Should().Contain("repo-scoped runner-offline alerts stay quiet");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_PiManagerDownDelayAndUpdateCenterRateLimit_MatchCanonicalAlerts()
|
||||
{
|
||||
var notesAlerts = File.ReadAllText(Path.Combine(
|
||||
Inventory.WorkspaceRoot,
|
||||
"FlowerCore.Notes",
|
||||
"scripts",
|
||||
"monitoring",
|
||||
"alerts.yml"));
|
||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||
|
||||
notesAlerts.Should().Contain("# Sprint 67: keep this warning behind NodeDown's 5m critical page");
|
||||
notesAlerts.Should().Contain("- alert: PiManagerDown");
|
||||
notesAlerts.Should().Contain("for: 8m");
|
||||
monitoring.Should().Contain("# Sprint 67: delayed behind NodeDown's critical page");
|
||||
monitoring.Should().Contain("- alert: PiManagerDown");
|
||||
monitoring.Should().Contain("for: 8m");
|
||||
|
||||
notesAlerts.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
|
||||
notesAlerts.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
|
||||
notesAlerts.Should().Contain("for: 15m");
|
||||
monitoring.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
|
||||
monitoring.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
|
||||
monitoring.Should().Contain("for: 15m");
|
||||
monitoring.Should().Contain("severity: warning");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustRemainManualRootOfGitOpsTree()
|
||||
{
|
||||
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "README.md"));
|
||||
var appsetPath = Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml");
|
||||
|
||||
File.Exists(appsetPath).Should().BeTrue();
|
||||
var appset = File.ReadAllText(appsetPath);
|
||||
|
||||
appset.Should().Contain("kind: ApplicationSet");
|
||||
appset.Should().Contain("name: bluejay-infra");
|
||||
appset.Should().NotContain("\nstatus:");
|
||||
appset.Should().NotContain("managedFields:");
|
||||
readme.Should().Contain("root of this GitOps tree");
|
||||
readme.Should().Contain("NOT self-managed");
|
||||
readme.Should().Contain("kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustDiscoverAppsDirectoryOnMain()
|
||||
{
|
||||
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
|
||||
|
||||
appset.Should().Contain("path: apps/*");
|
||||
appset.Should().Contain("revision: main");
|
||||
appset.Should().Contain("repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git");
|
||||
appset.Should().Contain("path: '{{.path.path}}'");
|
||||
appset.Should().Contain("targetRevision: main");
|
||||
appset.Should().Contain("ServerSideApply=true");
|
||||
appset.Should().Contain("RespectIgnoreDifferences=true");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApplicationSetExport_MustPreserveStatefulSetIgnoreDifferences()
|
||||
{
|
||||
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
|
||||
|
||||
appset.Should().Contain("jsonPointers:");
|
||||
appset.Should().Contain("- /spec/volumeClaimTemplates");
|
||||
appset.Should().Contain(".spec.volumeClaimTemplates[]?.status");
|
||||
Regex.Matches(appset, "kind: StatefulSet").Should().HaveCount(4);
|
||||
|
||||
foreach (var (name, ns) in new[]
|
||||
{
|
||||
("zabbix-postgres", "zabbix"),
|
||||
("guac-mysql", "guacamole"),
|
||||
("matrix-postgres", "matrix"),
|
||||
("authentik-postgres", "authentik"),
|
||||
})
|
||||
{
|
||||
appset.Should().Contain($"name: {name}");
|
||||
appset.Should().Contain($"namespace: {ns}");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
|
||||
{
|
||||
@@ -612,7 +739,6 @@ public sealed class FleetManifestLintTests
|
||||
var expectedFiles = new[]
|
||||
{
|
||||
"1password-item.yaml",
|
||||
"argocd-application.yaml",
|
||||
"certificate-web.yaml",
|
||||
"clusterrole-operator.yaml",
|
||||
"clusterrolebinding-operator.yaml",
|
||||
@@ -768,17 +894,62 @@ public sealed class FleetManifestLintTests
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
|
||||
public void FcDeviceManagement_MustRelyOnApplicationSetDiscovery()
|
||||
{
|
||||
var application = FcDeviceManagementDocuments()
|
||||
.Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
|
||||
var documents = FcDeviceManagementDocuments();
|
||||
|
||||
application.Namespace.Should().Be("argocd");
|
||||
application.Scalar("spec", "source", "repoURL")
|
||||
.Should()
|
||||
.Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
|
||||
application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
|
||||
application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
|
||||
documents.Should().NotContain(document => document.Kind == "Application");
|
||||
|
||||
var ns = documents.Single(document => document.Kind == "Namespace" && document.Name == "fc-devicemgmt");
|
||||
ns.FileText.Should().Contain("ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BroaderHardeningDeployments_MustAnnotateAnonymousHealthProbeIntent()
|
||||
{
|
||||
foreach (var expected in BroaderHardeningDeployments)
|
||||
{
|
||||
var deployment = AppDocuments(expected.Key)
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
|
||||
|
||||
PodAnnotation(deployment, "fc.flowercore.io/healthz-anon").Should().Be("true");
|
||||
PodAnnotation(deployment, "fc.flowercore.io/probe-path").Should().Be(expected.Value.ProbePath);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BroaderHardeningDeployments_MustDocumentForwardedProtoAuthPosture()
|
||||
{
|
||||
foreach (var expected in BroaderHardeningDeployments)
|
||||
{
|
||||
var deployment = AppDocuments(expected.Key)
|
||||
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
|
||||
|
||||
deployment.FileText.Should().Contain(
|
||||
"fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178)");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void BroaderHardeningInternalApps_MustOnlyPrestageCommentedPublicMethodAllowlist()
|
||||
{
|
||||
foreach (var app in BroaderHardeningInternalPrestageApps)
|
||||
{
|
||||
var documents = AppDocuments(app);
|
||||
var text = string.Join(Environment.NewLine, documents.Select(document => document.FileText));
|
||||
|
||||
text.Should().Contain("PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only)");
|
||||
text.Should().Contain("# - match: Host(`");
|
||||
text.Should().Contain("Method(`GET`) || Method(`HEAD`)");
|
||||
|
||||
documents
|
||||
.Where(document => document.Kind == "IngressRoute")
|
||||
.SelectMany(document => document.MappingSequence("spec", "routes"))
|
||||
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
|
||||
.Should()
|
||||
.NotContain(match => match.Contains(".flowercore.io", StringComparison.Ordinal),
|
||||
"Sprint 61 broader hardening only pre-stages commented public hosts for internal-only apps");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
|
||||
Reference in New Issue
Block a user