Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
90599b0413 fix(auth): harden public infra routes 2026-06-04 13:20:16 -05:00
40 changed files with 674 additions and 1271 deletions

View File

@@ -2,22 +2,6 @@
Infrastructure manifests for ArgoCD. An `ApplicationSet` in `argocd` namespace watches the `apps/*` directories in this repo and creates one `Application` per subdir (prefixed `infra-<name>`).
## Root GitOps ApplicationSet
`argocd/applicationset-bluejay-infra.yaml` is the root of this GitOps tree, but
it is **NOT self-managed** by ArgoCD. Apply it manually when the root generator
or sync policy changes:
```bash
kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml
```
Keep the per-StatefulSet `ignoreDifferences` entries in that file synced with
the live ApplicationSet. They intentionally cover `zabbix-postgres`,
`guac-mysql`, `matrix-postgres`, and `authentik-postgres` so ArgoCD does not
loop forever on server-side-apply `volumeClaimTemplates` status drift. Every new
StatefulSet with `volumeClaimTemplates` needs its own entry appended.
## Adding a new service to the cluster
Follow these steps in order. **Step 1 must run before step 3** — if you skip it, cert-manager HTTP-01 will silently fail for ~2h per cert (exponential backoff) until someone diagnoses the DNS.

View File

@@ -201,6 +201,8 @@ spec:
metadata:
labels:
app: andrew-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -225,12 +227,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -265,7 +273,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`bluejay.dev`) || Host(`www.bluejay.dev`)
- match: (Host(`bluejay.dev`) || Host(`www.bluejay.dev`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: andrew-web

View File

@@ -113,12 +113,7 @@ spec:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
# apiVersion/kind included deliberately: this STS was created via ArgoCD ServerSideApply,
# so the live object carries PVC TypeMeta inside volumeClaimTemplates; omitting it here
# leaves the app eternally OutOfSync even though kubectl SSA dry-run shows no change.
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
- metadata:
name: pgdata
spec:
storageClassName: longhorn

View File

@@ -201,6 +201,8 @@ spec:
metadata:
labels:
app: dustin-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -225,12 +227,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -265,7 +273,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`timeforta.co`) || Host(`www.timeforta.co`)
- match: (Host(`timeforta.co`) || Host(`www.timeforta.co`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: dustin-web

View File

@@ -201,6 +201,8 @@ spec:
metadata:
labels:
app: erik-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -225,12 +227,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -265,7 +273,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`erckak.dev`) || Host(`www.erckak.dev`)
- match: (Host(`erckak.dev`) || Host(`www.erckak.dev`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: erik-web

View File

@@ -46,8 +46,6 @@ spec:
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
@@ -56,7 +54,6 @@ spec:
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: aistation-web-config
@@ -170,26 +167,3 @@ spec:
port: 80
tls:
secretName: aistation-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose aistation-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: aistation-web-public
# namespace: fc-aistation
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`aistation.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: aistation-web-public-profile-header # injects entitlement profile
# services:
# - name: aistation-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -112,8 +112,6 @@ spec:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
@@ -130,7 +128,6 @@ spec:
ports:
- name: http
containerPort: 8080
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
envFrom:
- configMapRef:
name: chat-web-config

View File

@@ -51,26 +51,3 @@ spec:
port: 8080
tls:
secretName: remotedesktop-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose remotedesktop-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: remotedesktop-web-public
# namespace: fc-desktop
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`desktop.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: remotedesktop-web-public-profile-header # injects entitlement profile
# services:
# - name: remotedesktop-web
# port: 8080
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -52,8 +52,6 @@ spec:
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
@@ -69,7 +67,6 @@ spec:
ports:
- name: http
containerPort: 8080
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: dms-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose dms-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: dms-web-public
# namespace: fc-dms
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`dms.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: dms-web-public-profile-header # injects entitlement profile
# services:
# - name: dms-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -203,6 +203,8 @@ spec:
metadata:
labels:
app: fc-landing
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -227,12 +229,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -298,7 +306,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`flowercore.io`) || Host(`www.flowercore.io`)
- match: (Host(`flowercore.io`) || Host(`www.flowercore.io`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: fc-landing
@@ -316,7 +324,7 @@ spec:
entryPoints:
- web
routes:
- match: Host(`flowercore.io`) || Host(`www.flowercore.io`)
- match: (Host(`flowercore.io`) || Host(`www.flowercore.io`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: fc-landing

View File

@@ -46,8 +46,6 @@ spec:
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
@@ -56,7 +54,6 @@ spec:
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: library-web-config
@@ -170,26 +167,3 @@ spec:
port: 80
tls:
secretName: library-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose library-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: library-web-public
# namespace: fc-library
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`library.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: library-web-public-profile-header # injects entitlement profile
# services:
# - name: library-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -83,8 +83,6 @@ spec:
app.kubernetes.io/name: fc-llm-bridge
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
@@ -118,7 +116,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -284,26 +281,3 @@ spec:
port: 8080
tls:
secretName: fc-llm-bridge-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose fc-llm-bridge publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: fc-llm-bridge-public
# namespace: fc-llm-bridge
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`llm-bridge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: fc-llm-bridge-public-profile-header # injects entitlement profile
# services:
# - name: fc-llm-bridge
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: menuboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose menuboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: menuboard-web-public
# namespace: fc-menuboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`menuboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: menuboard-web-public-profile-header # injects entitlement profile
# services:
# - name: menuboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -41,8 +41,6 @@ spec:
labels:
app: messageboard-web
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
@@ -54,7 +52,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
envFrom:
- configMapRef:
name: messageboard-web-config
@@ -144,26 +141,3 @@ spec:
port: 80
tls:
secretName: messageboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose messageboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: messageboard-web-public
# namespace: fc-messageboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`messageboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: messageboard-web-public-profile-header # injects entitlement profile
# services:
# - name: messageboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 5300
tls:
secretName: mysql-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose mysql-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: mysql-web-public
# namespace: fc-mysql
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`mysql.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: mysql-web-public-profile-header # injects entitlement profile
# services:
# - name: mysql-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 5400
tls:
secretName: php-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose php-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: php-web-public
# namespace: fc-php
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`php.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: php-web-public-profile-header # injects entitlement profile
# services:
# - name: php-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: presentations-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose presentations-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: presentations-web-public
# namespace: fc-presentations
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`presentations.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: presentations-web-public-profile-header # injects entitlement profile
# services:
# - name: presentations-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -46,8 +46,6 @@ spec:
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
@@ -57,7 +55,6 @@ spec:
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: retail-web-config
@@ -171,26 +168,3 @@ spec:
port: 80
tls:
secretName: retail-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose retail-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: retail-web-public
# namespace: fc-retail
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`retail.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: retail-web-public-profile-header # injects entitlement profile
# services:
# - name: retail-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: scoreboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose scoreboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: scoreboard-web-public
# namespace: fc-scoreboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`scoreboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: scoreboard-web-public-profile-header # injects entitlement profile
# services:
# - name: scoreboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -37,26 +37,3 @@ spec:
port: 80
tls:
secretName: segmentdisplay-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose segmentdisplay-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: segmentdisplay-web-public
# namespace: fc-segmentdisplay
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`segmentdisplay.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: segmentdisplay-web-public-profile-header # injects entitlement profile
# services:
# - name: segmentdisplay-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -46,26 +46,3 @@ spec:
services:
- name: signage-web
port: 5190
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose signage-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: signage-web-public
# namespace: fc-signage
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`signage.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: signage-web-public-profile-header # injects entitlement profile
# services:
# - name: signage-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -97,7 +97,6 @@ spec:
containers:
- name: piper
image: rhasspy/wyoming-piper:latest
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: PYTHONHTTPSVERIFY
value: "0"
@@ -524,8 +523,6 @@ spec:
app.kubernetes.io/name: ttsreader-web
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "5217"
prometheus.io/path: "/metrics"
@@ -765,26 +762,3 @@ spec:
port: 5217
tls:
secretName: ttsreader-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose ttsreader-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: ttsreader-web-public
# namespace: fc-ttsreader
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`ttsreader.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: ttsreader-web-public-profile-header # injects entitlement profile
# services:
# - name: ttsreader-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -52,9 +52,6 @@ spec:
app: updatecenter-web
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/"
labels:
app: updatecenter-web
spec:
@@ -66,7 +63,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: http://+:8080

View File

@@ -201,6 +201,8 @@ spec:
metadata:
labels:
app: fit-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -225,12 +227,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -265,7 +273,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`flowerinsider.xyz`) || Host(`www.flowerinsider.xyz`)
- match: (Host(`flowerinsider.xyz`) || Host(`www.flowerinsider.xyz`)) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: fit-web

View File

@@ -257,6 +257,8 @@ spec:
metadata:
labels:
app: flowercore-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -281,12 +283,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:

View File

@@ -11,7 +11,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`gitea.flowercore.io`)
- match: Host(`gitea.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
kind: Rule
services:
- name: gitea-http

View File

@@ -24,12 +24,6 @@ original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
strategy: no two pods share one RWO PVC.
Ephemeral runner pods are expected to register, run one job, deregister, and
exit so the Deployment starts a fresh pod for the next registration token. A
small amount of exit-1/restart churn from token-expiry or no-work windows is
accepted operational noise as long as jobs are not stuck queued and the
repo-scoped runner-offline alerts stay quiet.
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,

View File

@@ -90,8 +90,6 @@ spec:
app.kubernetes.io/name: knowledge-web
app.kubernetes.io/part-of: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
@@ -119,7 +117,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -289,26 +286,3 @@ spec:
port: 80
tls:
secretName: knowledge-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose knowledge-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: knowledge-web-public
# namespace: knowledge
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`knowledge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: knowledge-web-public-profile-header # injects entitlement profile
# services:
# - name: knowledge-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -243,7 +243,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`webmail.flowercore.io`)
- match: Host(`webmail.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
kind: Rule
services:
- name: mail-webmail

View File

@@ -479,7 +479,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`element.flowercore.io`)
- match: Host(`element.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
kind: Rule
services:
- name: element-web
@@ -497,7 +497,7 @@ spec:
entryPoints:
- websecure
routes:
- match: Host(`matrix.flowercore.io`)
- match: Host(`matrix.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
kind: Rule
services:
- name: synapse

View File

@@ -216,24 +216,19 @@ data:
- job_name: "pimanager-app"
scrape_interval: 15s
metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
static_configs:
- targets: ["piez.iamworkin.lan"]
- targets: ["10.0.58.25:5000"]
labels:
instance: "piez"
service: "signalcontrol"
service: "pimanager"
vlan: "home"
device: "pi4-ezconnect"
rig: "signal-b"
- targets: ["pirelay.iamworkin.lan"]
- targets: ["10.0.58.113:5200"]
labels:
instance: "pirelay"
service: "signalcontrol"
service: "pimanager"
vlan: "home"
device: "pi3-ks0212"
rig: "signal-a"
# Epson ET-3750 EcoTank Printer SNMP
- job_name: "snmp-printer"
@@ -493,12 +488,6 @@ data:
- "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
@@ -843,9 +832,7 @@ data:
rules:
- alert: PiManagerDown
expr: up{job="pimanager-app"} == 0
# Sprint 67: delayed behind NodeDown's critical page so a powered-off
# Pi does not create the first duplicate page for the same host.
for: 8m
for: 3m
labels:
severity: warning
annotations:
@@ -924,13 +911,12 @@ data:
# of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m.
- alert: EpsonPrinterDown
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
expr: up{job="snmp-printer"} == 0
for: 30m
labels:
severity: info
alert_channel: irc
severity: warning
annotations:
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
- alert: SynologyDiskLow
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
@@ -1244,58 +1230,6 @@ data:
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
# ============================================================
# Update Center public-edge probes
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
# This K8s ConfigMap is the future migration target; live Prometheus
# still reads the canonical Notes file from noc1 Podman.
# ============================================================
- name: update_center
rules:
# Critical only when the edge is genuinely unreachable. A Cloudflare
# HTTP 429 means the prober hit a rate-limit, not that real clients
# are down, so the warning rule below owns that signal.
- alert: UpdateCenterPublicEdgeDown
expr: |
(probe_success{job="probe-update-center-public-edge"} == 0)
unless on(instance)
(probe_http_status_code{job="probe-update-center-public-edge"} == 429)
for: 10m
labels:
severity: critical
service: update-center
alert_channel: irc
annotations:
summary: "Update Center public edge probe failed for {{ $labels.instance }}"
description: >-
The external probe for {{ $labels.instance }} failed for 10 minutes with a
non-2xx status that is not a rate-limit. Public Update Center clients may be
unable to fetch manifest schema metadata through Cloudflare.
runbook: >-
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema
2. Verify Cloudflare DNS record is proxied and targets the current public edge IP
3. kubectl -n fc-updater get ingressroute updatecenter-web-public secret cf-origin-flowercore-io
4. Check Traefik logs for Method() or TLS secret errors
- alert: UpdateCenterPublicEdgeRateLimited
expr: probe_http_status_code{job="probe-update-center-public-edge"} == 429
for: 15m
labels:
severity: warning
service: update-center
alert_channel: irc
annotations:
summary: "Cloudflare is rate-limiting (HTTP 429) the public-edge probe for {{ $labels.instance }}"
description: >-
The blackbox prober receives HTTP 429 from Cloudflare for {{ $labels.instance }}
while the origin is healthy. This is a Cloudflare rate-limit / WAF condition on
the public hostname, not an outage.
runbook: >-
1. curl -sk https://{{ $labels.instance }}/api/v1/manifests/_schema (expect 200 from a normal client)
2. Review Cloudflare rate-limit / WAF rules for the hostname; the 5m-cadence prober is tripping a 429
3. Add a Cloudflare rate-limit exception for the prober source IP or the /api/v1/manifests/_schema path
4. Confirm whether the singular host update.flowercore.io is still required, or only updates.flowercore.io
# =============================================================================
# ConfigMap: Blackbox Exporter Configuration
# =============================================================================

View File

@@ -134,6 +134,8 @@ spec:
metadata:
labels:
app: pki-web
annotations:
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
containers:
- name: nginx
@@ -158,12 +160,18 @@ spec:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 80
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 3
periodSeconds: 5
volumes:
@@ -201,6 +209,7 @@ spec:
dnsNames:
- pki.iamworkin.lan
---
# Internal-only route: if a public twin is ever operator-approved, gate it with Host(`<public-host>`) && (Method(`GET`) || Method(`HEAD`)).
# Traefik IngressRoute
apiVersion: traefik.io/v1alpha1
kind: IngressRoute

View File

@@ -114,9 +114,6 @@ spec:
app: telephony-web
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
labels:
app: telephony-web
spec:
@@ -164,7 +161,6 @@ spec:
ports:
- containerPort: 5100
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: Telephony__Twilio__AccountSid
valueFrom:
@@ -211,12 +207,18 @@ spec:
httpGet:
path: /health
port: 5100
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 5100
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 10
periodSeconds: 5
volumes:
@@ -260,12 +262,12 @@ spec:
- websecure
routes:
- kind: Rule
match: Host(`telephony.flowercore.io`)
match: Host(`telephony.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
services:
- name: telephony-web
port: 5100
- kind: Rule
match: Host(`telephony.iamwork.in`)
match: Host(`telephony.iamwork.in`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
services:
- name: telephony-web
port: 5100
@@ -391,3 +393,4 @@ spec:

View File

@@ -21,6 +21,7 @@ spec:
basicAuth:
secret: traefik-dashboard-auth
---
# Internal-only route: if a public twin is ever operator-approved, gate it with Host(`<public-host>`) && (Method(`GET`) || Method(`HEAD`)).
# Dashboard IngressRoute
apiVersion: traefik.io/v1alpha1
kind: IngressRoute

View File

@@ -66,7 +66,7 @@ spec:
- websecure
routes:
- kind: Rule
match: Host(`voice.bluejay.dev`)
match: Host(`voice.bluejay.dev`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
services:
- name: voice-bridge
port: 8766
@@ -84,7 +84,7 @@ spec:
- websecure
routes:
- kind: Rule
match: Host(`voice-ws.bluejay.dev`)
match: Host(`voice-ws.bluejay.dev`) && (Method(`GET`) || Method(`HEAD`))
services:
- name: voice-bridge
port: 8765

View File

@@ -77,8 +77,6 @@ spec:
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
@@ -95,7 +93,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -257,26 +254,3 @@ spec:
port: 80
tls:
secretName: worldbuilder-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose worldbuilder-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: worldbuilder-web-public
# namespace: worldbuilder
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`worldbuilder.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: worldbuilder-web-public-profile-header # injects entitlement profile
# services:
# - name: worldbuilder-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -344,6 +344,7 @@ spec:
dnsNames:
- zabbix.iamworkin.lan
---
# Internal-only route: if a public twin is ever operator-approved, gate it with Host(`<public-host>`) && (Method(`GET`) || Method(`HEAD`)).
# Traefik IngressRoute
apiVersion: traefik.io/v1alpha1
kind: IngressRoute

View File

@@ -1,74 +0,0 @@
apiVersion: argoproj.io/v1alpha1
kind: ApplicationSet
metadata:
annotations:
argocd.argoproj.io/refresh: "true"
name: bluejay-infra
namespace: argocd
spec:
generators:
- git:
directories:
- path: apps/*
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
revision: main
template:
metadata: {}
spec:
destination: {}
project: ""
goTemplate: true
goTemplateOptions:
- missingkey=error
template:
metadata:
name: infra-{{.path.basename}}
spec:
destination:
server: https://kubernetes.default.svc
ignoreDifferences:
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: zabbix-postgres
namespace: zabbix
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: guac-mysql
namespace: guacamole
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: matrix-postgres
namespace: matrix
- group: apps
jqPathExpressions:
- .spec.volumeClaimTemplates[]?.status
jsonPointers:
- /spec/volumeClaimTemplates
kind: StatefulSet
name: authentik-postgres
namespace: authentik
project: default
source:
path: '{{.path.path}}'
repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git
targetRevision: main
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=true
- ServerSideApply=true
- RespectIgnoreDifferences=true

View File

@@ -13,21 +13,67 @@ public sealed class FleetManifestLintTests
private static readonly HashSet<string> PublicReadOnlyHosts = new(StringComparer.Ordinal)
{
"bluejay.dev",
"brochure.flowercore.io",
"dist.flowercore.io",
"element.flowercore.io",
"erckak.dev",
"flowercore.io",
"flowerinsider.xyz",
"timeforta.co",
"voice-ws.bluejay.dev",
"www.bluejay.dev",
"www.erckak.dev",
"www.flowercore.io",
"www.flowerinsider.xyz",
"www.timeforta.co",
};
// Hosts that allow a tightly bounded write surface in addition to GET/HEAD.
// updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
// Public hosts that allow a tightly bounded write surface in addition to
// GET/HEAD. updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
// PUT/PATCH/DELETE must still 404 at the route. Public
// update.flowercore.io remains a GET/HEAD download surface in the
// FlowerCore.Updater sibling manifest and is covered by the general
// public-method allowlist lint instead of this write-surface rule.
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
// set should fail this lint.
//
// PUB-1 (2026-05-06): update.flowercore.io / updates.flowercore.io were
// added for the Cloudflare-proxied public Update Center edge. They use the
// same bounded read-write allowlist as the LAN pair.
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
{
"chat.flowercore.io",
"gitea.flowercore.io",
"matrix.flowercore.io",
"telephony.flowercore.io",
"telephony.iamwork.in",
"updatecenter.iamworkin.lan",
"updates.iamworkin.lan",
"update.flowercore.io",
"updates.flowercore.io",
"voice.bluejay.dev",
"webmail.flowercore.io",
};
private static readonly IReadOnlyDictionary<string, string> InfraHealthzProbeDeployments = new Dictionary<string, string>(StringComparer.Ordinal)
{
["andrew"] = "andrew-web",
["dustin"] = "dustin-web",
["erik"] = "erik-web",
["fc-landing"] = "fc-landing",
["fit"] = "fit-web",
["flowercore"] = "flowercore-web",
["pki-web"] = "pki-web",
};
private static readonly IReadOnlyDictionary<string, string> InfraForwardedProtoProbeDeployments = new Dictionary<string, string>(StringComparer.Ordinal)
{
["andrew"] = "andrew-web",
["dustin"] = "dustin-web",
["erik"] = "erik-web",
["fc-landing"] = "fc-landing",
["fit"] = "fit-web",
["flowercore"] = "flowercore-web",
["pki-web"] = "pki-web",
["telephony"] = "telephony-web",
};
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
@@ -65,7 +111,7 @@ public sealed class FleetManifestLintTests
["github-runner-updater"] = "https://github.com/astoltz/FlowerCore.Updater",
};
private static readonly HashSet<string> RepoScopedLinuxRunnerDeployments = new(StringComparer.Ordinal)
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
{
"github-runner-sharedpos",
"github-runner-puppet",
@@ -79,44 +125,6 @@ public sealed class FleetManifestLintTests
"github-runner-updater",
};
private static readonly IReadOnlyDictionary<string, (string Deployment, string ProbePath)> BroaderHardeningDeployments =
new Dictionary<string, (string Deployment, string ProbePath)>(StringComparer.Ordinal)
{
["fc-aistation"] = ("aistation-web", "/healthz"),
["fc-chat"] = ("chat-web", "/healthz"),
["fc-devicemgmt"] = ("fc-devicemgmt-web", "/healthz"),
["fc-library"] = ("library-web", "/health"),
["fc-llm-bridge"] = ("fc-llm-bridge", "/healthz"),
["fc-messageboard"] = ("messageboard-web", "/health"),
["fc-retail"] = ("retail-web", "/healthz"),
["fc-ttsreader"] = ("ttsreader-web", "/health"),
["fc-updater"] = ("updatecenter-web", "/"),
["knowledge"] = ("knowledge-web", "/healthz"),
["telephony"] = ("telephony-web", "/health"),
["worldbuilder"] = ("worldbuilder-web", "/healthz"),
};
private static readonly HashSet<string> BroaderHardeningInternalPrestageApps = new(StringComparer.Ordinal)
{
"fc-aistation",
"fc-desktop",
"fc-dms",
"fc-library",
"fc-llm-bridge",
"fc-menuboard",
"fc-messageboard",
"fc-mysql",
"fc-php",
"fc-presentations",
"fc-retail",
"fc-scoreboard",
"fc-segmentdisplay",
"fc-signage",
"fc-ttsreader",
"knowledge",
"worldbuilder",
};
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
{
["HOME"] = "/home/runner",
@@ -165,8 +173,13 @@ public sealed class FleetManifestLintTests
}))
.Where(entry => PublicReadOnlyHosts.Any(host => entry.Match.Contains($"Host(`{host}`)", StringComparison.Ordinal)))
.Where(entry => !entry.Match.Contains("Method(`GET`)", StringComparison.Ordinal)
|| !entry.Match.Contains("Method(`HEAD`)", StringComparison.Ordinal))
.Select(entry => $"{entry.Document.Descriptor} is missing an explicit GET/HEAD method allowlist.")
|| !entry.Match.Contains("Method(`HEAD`)", StringComparison.Ordinal)
|| entry.Match.Contains("Method(`POST`)", StringComparison.Ordinal)
|| entry.Match.Contains("Method(`PUT`)", StringComparison.Ordinal)
|| entry.Match.Contains("Method(`PATCH`)", StringComparison.Ordinal)
|| entry.Match.Contains("Method(`DELETE`)", StringComparison.Ordinal)
|| entry.Match.Contains("Method(`OPTIONS`)", StringComparison.Ordinal))
.Select(entry => $"{entry.Document.Descriptor} must explicitly allow GET/HEAD only on a public read-only host.")
.ToList();
violations.Should().BeEmpty();
@@ -305,17 +318,17 @@ public sealed class FleetManifestLintTests
}
[Fact]
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForRepoScopedDeployments()
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForScaledDeployments()
{
var deployments = GitHubRunnerDeployments();
foreach (var deploymentName in RepoScopedLinuxRunnerDeployments)
foreach (var deploymentName in ScaledLinuxRunnerDeployments)
{
var deployment = deployments[deploymentName];
// Sprint 34 ops trimmed runner load while the cluster was degraded
// to two healthy nodes. Repo-scoped runners can be tuned back above
// one replica, but they must stay RWO-safe before that happens.
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(1, $"{deploymentName} must keep at least one repo-scoped runner online");
// Scaled runners must have >= 2 replicas (avoid single-pod bottleneck).
// Individual deployments may be tuned upward per CI activity — see
// "runners: right-size replica counts per 14d CI activity (#24)".
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(2, $"{deploymentName} is in the scaled set and must run with at least 2 replicas");
var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
var claimNames = volumes
@@ -323,7 +336,7 @@ public sealed class FleetManifestLintTests
.Where(value => !string.IsNullOrWhiteSpace(value))
.ToList();
claimNames.Should().BeEmpty($"{deploymentName} must remain ready for safe multi-replica scaling without sharing a RWO PVC");
claimNames.Should().BeEmpty($"{deploymentName} is scaled and must not share a RWO PVC");
volumes.Should().Contain(volume =>
string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
&& ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
@@ -468,99 +481,6 @@ public sealed class FleetManifestLintTests
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
}
[Fact]
public void GithubRunnerReadme_DocumentsAcceptedEphemeralExitChurn()
{
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "github-runner", "README.md"));
readme.Should().Contain("Ephemeral runner pods");
readme.Should().Contain("exit-1/restart churn");
readme.Should().Contain("accepted operational noise");
readme.Should().Contain("repo-scoped runner-offline alerts stay quiet");
}
[Fact]
public void Monitoring_PiManagerDownDelayAndUpdateCenterRateLimit_MatchCanonicalAlerts()
{
var notesAlerts = File.ReadAllText(Path.Combine(
Inventory.WorkspaceRoot,
"FlowerCore.Notes",
"scripts",
"monitoring",
"alerts.yml"));
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
notesAlerts.Should().Contain("# Sprint 67: keep this warning behind NodeDown's 5m critical page");
notesAlerts.Should().Contain("- alert: PiManagerDown");
notesAlerts.Should().Contain("for: 8m");
monitoring.Should().Contain("# Sprint 67: delayed behind NodeDown's critical page");
monitoring.Should().Contain("- alert: PiManagerDown");
monitoring.Should().Contain("for: 8m");
notesAlerts.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
notesAlerts.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
notesAlerts.Should().Contain("for: 15m");
monitoring.Should().Contain("- alert: UpdateCenterPublicEdgeRateLimited");
monitoring.Should().Contain("expr: probe_http_status_code{job=\"probe-update-center-public-edge\"} == 429");
monitoring.Should().Contain("for: 15m");
monitoring.Should().Contain("severity: warning");
}
[Fact]
public void ApplicationSetExport_MustRemainManualRootOfGitOpsTree()
{
var readme = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "README.md"));
var appsetPath = Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml");
File.Exists(appsetPath).Should().BeTrue();
var appset = File.ReadAllText(appsetPath);
appset.Should().Contain("kind: ApplicationSet");
appset.Should().Contain("name: bluejay-infra");
appset.Should().NotContain("\nstatus:");
appset.Should().NotContain("managedFields:");
readme.Should().Contain("root of this GitOps tree");
readme.Should().Contain("NOT self-managed");
readme.Should().Contain("kubectl -n argocd apply -f argocd/applicationset-bluejay-infra.yaml");
}
[Fact]
public void ApplicationSetExport_MustDiscoverAppsDirectoryOnMain()
{
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
appset.Should().Contain("path: apps/*");
appset.Should().Contain("revision: main");
appset.Should().Contain("repoURL: http://gitea-clusterip.gitea.svc:3000/bluejay/bluejay-infra.git");
appset.Should().Contain("path: '{{.path.path}}'");
appset.Should().Contain("targetRevision: main");
appset.Should().Contain("ServerSideApply=true");
appset.Should().Contain("RespectIgnoreDifferences=true");
}
[Fact]
public void ApplicationSetExport_MustPreserveStatefulSetIgnoreDifferences()
{
var appset = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "argocd", "applicationset-bluejay-infra.yaml"));
appset.Should().Contain("jsonPointers:");
appset.Should().Contain("- /spec/volumeClaimTemplates");
appset.Should().Contain(".spec.volumeClaimTemplates[]?.status");
Regex.Matches(appset, "kind: StatefulSet").Should().HaveCount(4);
foreach (var (name, ns) in new[]
{
("zabbix-postgres", "zabbix"),
("guac-mysql", "guacamole"),
("matrix-postgres", "matrix"),
("authentik-postgres", "authentik"),
})
{
appset.Should().Contain($"name: {name}");
appset.Should().Contain($"namespace: {ns}");
}
}
[Fact]
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
{
@@ -600,6 +520,49 @@ public sealed class FleetManifestLintTests
violations.Should().BeEmpty();
}
[Fact]
public void AuthSafeInfraHealthzProbes_MustDeclareAnonymousHealthzContract()
{
var violations = InfraHealthzProbeDeployments.SelectMany(expected =>
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value);
var hasHealthzProbe = deployment.MainContainerMappings()
.Any(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz"
|| ProbeHttpGetPath(container, "livenessProbe") == "/healthz");
return hasHealthzProbe
&& !string.Equals(PodAnnotation(deployment, "flowercore.io/healthz-auth-policy"), "allow-anonymous", StringComparison.Ordinal)
? new[] { $"{deployment.Descriptor} probes /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous." }
: Array.Empty<string>();
}).ToList();
violations.Should().BeEmpty();
}
[Fact]
public void AuthSafeInfraHttpProbes_MustSendForwardedProtoHttpsHeader()
{
var violations = InfraForwardedProtoProbeDeployments.SelectMany(expected =>
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value);
return deployment.MainContainerMappings()
.SelectMany(container => new[] { "startupProbe", "readinessProbe", "livenessProbe" }
.Where(probeKey => ProbeHttpGetPath(container, probeKey) is "/healthz" or "/health")
.Where(probeKey => !string.Equals(ProbeHttpGetHeaderValue(container, probeKey, "X-Forwarded-Proto"), "https", StringComparison.Ordinal))
.Select(probeKey =>
{
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
return $"{deployment.Descriptor} container '{containerName}' {probeKey} is missing X-Forwarded-Proto=https.";
}));
}).ToList();
violations.Should().BeEmpty();
}
[Fact]
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
{
@@ -739,6 +702,7 @@ public sealed class FleetManifestLintTests
var expectedFiles = new[]
{
"1password-item.yaml",
"argocd-application.yaml",
"certificate-web.yaml",
"clusterrole-operator.yaml",
"clusterrolebinding-operator.yaml",
@@ -894,62 +858,17 @@ public sealed class FleetManifestLintTests
}
[Fact]
public void FcDeviceManagement_MustRelyOnApplicationSetDiscovery()
public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
{
var documents = FcDeviceManagementDocuments();
var application = FcDeviceManagementDocuments()
.Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
documents.Should().NotContain(document => document.Kind == "Application");
var ns = documents.Single(document => document.Kind == "Namespace" && document.Name == "fc-devicemgmt");
ns.FileText.Should().Contain("ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.");
}
[Fact]
public void BroaderHardeningDeployments_MustAnnotateAnonymousHealthProbeIntent()
{
foreach (var expected in BroaderHardeningDeployments)
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
PodAnnotation(deployment, "fc.flowercore.io/healthz-anon").Should().Be("true");
PodAnnotation(deployment, "fc.flowercore.io/probe-path").Should().Be(expected.Value.ProbePath);
}
}
[Fact]
public void BroaderHardeningDeployments_MustDocumentForwardedProtoAuthPosture()
{
foreach (var expected in BroaderHardeningDeployments)
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
deployment.FileText.Should().Contain(
"fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178)");
}
}
[Fact]
public void BroaderHardeningInternalApps_MustOnlyPrestageCommentedPublicMethodAllowlist()
{
foreach (var app in BroaderHardeningInternalPrestageApps)
{
var documents = AppDocuments(app);
var text = string.Join(Environment.NewLine, documents.Select(document => document.FileText));
text.Should().Contain("PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only)");
text.Should().Contain("# - match: Host(`");
text.Should().Contain("Method(`GET`) || Method(`HEAD`)");
documents
.Where(document => document.Kind == "IngressRoute")
.SelectMany(document => document.MappingSequence("spec", "routes"))
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
application.Namespace.Should().Be("argocd");
application.Scalar("spec", "source", "repoURL")
.Should()
.NotContain(match => match.Contains(".flowercore.io", StringComparison.Ordinal),
"Sprint 61 broader hardening only pre-stages commented public hosts for internal-only apps");
}
.Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
}
[Fact]
@@ -1186,6 +1105,20 @@ public sealed class FleetManifestLintTests
: null;
}
private static string? ProbeHttpGetHeaderValue(YamlMappingNode container, string probeKey, string name)
{
if (!ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
|| !ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet))
{
return null;
}
return ManifestNodeExtensions.MappingSequence(httpGet, "httpHeaders")
.Where(header => string.Equals(ManifestNodeExtensions.Scalar(header, "name"), name, StringComparison.Ordinal))
.Select(header => ManifestNodeExtensions.Scalar(header, "value"))
.SingleOrDefault();
}
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
{
return Inventory.Documents