Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
eaba7cd171 fc-desktop: add phase 1 capacity guards 2026-05-20 15:49:20 -05:00
8 changed files with 381 additions and 913 deletions

View File

@@ -1,448 +0,0 @@
# Authentik OIDC backend
# ArgoCD-managed. BlueJay Lab.
#
# Stack:
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
# - Redis 7 Deployment (no persistence — session/cache only)
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
# - Certificate via step-ca-acme ClusterIssuer
# - Traefik IngressRoute at id.iamworkin.lan
#
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
#
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
# via API once the bootstrap token is available — see Notes substrate).
---
apiVersion: v1
kind: Namespace
metadata:
name: authentik
labels:
app.kubernetes.io/part-of: bluejay-infra
---
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: authentik-credentials
namespace: authentik
spec:
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
---
# Shared media volume for server + worker pods.
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: authentik-media
namespace: authentik
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 2Gi
---
# PostgreSQL 16 StatefulSet — Authentik's primary store.
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: authentik-postgres
namespace: authentik
labels:
app: authentik-postgres
argocd.argoproj.io/instance: infra-authentik
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Retain
whenScaled: Retain
podManagementPolicy: OrderedReady
serviceName: authentik-postgres
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: authentik-postgres
template:
metadata:
labels:
app: authentik-postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
name: postgres
env:
- name: POSTGRES_USER
value: authentik
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
value: authentik
- name: POSTGRES_INITDB_ARGS
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
readinessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 1Gi }
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
- metadata:
name: pgdata
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
volumeMode: Filesystem
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: authentik-postgres
namespace: authentik
spec:
clusterIP: None
selector:
app: authentik-postgres
ports:
- name: postgres
port: 5432
targetPort: 5432
---
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-redis
namespace: authentik
labels:
app: authentik-redis
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: authentik-redis
template:
metadata:
labels:
app: authentik-redis
spec:
containers:
- name: redis
image: redis:7-alpine
args:
- "--save"
- ""
- "--appendonly"
- "no"
- "--requirepass"
- "$(REDIS_PASSWORD)"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
ports:
- containerPort: 6379
name: redis
readinessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 50m, memory: 64Mi }
limits: { cpu: 500m, memory: 256Mi }
---
apiVersion: v1
kind: Service
metadata:
name: authentik-redis
namespace: authentik
spec:
selector:
app: authentik-redis
ports:
- name: redis
port: 6379
targetPort: 6379
---
# Authentik server Deployment — HTTP frontend on :9000.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-server
namespace: authentik
labels:
app: authentik-server
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with worker
selector:
matchLabels:
app: authentik-server
template:
metadata:
labels:
app: authentik-server
spec:
securityContext:
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
# non-root container can mkdir /media/public during the tenant_files migration.
fsGroup: 1000
containers:
- name: server
image: ghcr.io/goauthentik/server:2024.12.3
args: ["server"]
ports:
- containerPort: 9000
name: http
- containerPort: 9443
name: https
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_TOKEN
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_TOKEN
- name: AUTHENTIK_BOOTSTRAP_EMAIL
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_EMAIL
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
# First-boot Authentik can take 3+ min on the migration phase
# (waiting on DB lock while worker also runs migrations). Initial
# delays are generous so kubelet doesn't kill the pod mid-migration;
# periodSeconds keeps post-startup probing responsive.
readinessProbe:
httpGet:
path: /-/health/ready/
port: 9000
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 12
livenessProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 300
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
startupProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
resources:
requests: { cpu: 150m, memory: 512Mi }
limits: { cpu: 1500m, memory: 1Gi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
# Authentik worker Deployment — runs Celery background tasks.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-worker
namespace: authentik
labels:
app: authentik-worker
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with server
selector:
matchLabels:
app: authentik-worker
template:
metadata:
labels:
app: authentik-worker
spec:
securityContext:
# Same as server pod — non-root uid 1000 needs PVC group write.
fsGroup: 1000
containers:
- name: worker
image: ghcr.io/goauthentik/server:2024.12.3
args: ["worker"]
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 768Mi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
apiVersion: v1
kind: Service
metadata:
name: authentik-server
namespace: authentik
spec:
selector:
app: authentik-server
ports:
- name: http
port: 9000
targetPort: 9000
- name: https
port: 9443
targetPort: 9443
---
# step-ca leaf certificate for id.iamworkin.lan.
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: authentik-tls
namespace: authentik
spec:
secretName: authentik-tls
dnsNames:
- id.iamworkin.lan
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: authentik
namespace: authentik
spec:
entryPoints: [websecure]
routes:
- match: Host(`id.iamworkin.lan`)
kind: Rule
services:
- name: authentik-server
port: 9000
tls:
secretName: authentik-tls

View File

@@ -0,0 +1,33 @@
# FlowerCore Remote Desktop - session pod resource defaults
#
# Namespace-level LimitRange for Sprint 44 Phase 1. This defends the
# fc-desktop namespace from unbounded container requests while the
# per-tenant advisory FairShareEvaluator lands in FlowerCore.RemoteDesktop.
apiVersion: v1
kind: LimitRange
metadata:
name: fc-desktop-pod-defaults
namespace: fc-desktop
labels:
app.kubernetes.io/name: fc-desktop
app.kubernetes.io/part-of: remotedesktop
app.kubernetes.io/component: capacity-guard
app.kubernetes.io/managed-by: argocd
flowercore.io/owner: infra
annotations:
flowercore.io/phase: sprint-44-cx-9-phase-a
spec:
limits:
- type: Container
default:
cpu: "1.0"
memory: "2Gi"
defaultRequest:
cpu: "500m"
memory: "1Gi"
max:
cpu: "2.0"
memory: "4Gi"
min:
cpu: "100m"
memory: "128Mi"

View File

@@ -0,0 +1,36 @@
# FlowerCore Remote Desktop - namespace ResourceQuota (GitOps-managed)
#
# Adopts the live fc-desktop-session-cap object created during the
# 2026-05-19 prewarm-cascade triage. Sprint 44 Phase 1 keeps the pod,
# CPU, and memory guard unchanged, then adds storage/PVC backstops from
# the fc-desktop CPU expansion substrate.
#
# Two-phase deploy note:
# Phase A: apply this ResourceQuota and limitrange.yaml with the current
# FlowerCore.RemoteDesktop image.
# Phase B: bump the service image only after the RemoteDesktop service
# admission/fair-share code lands in that repo.
apiVersion: v1
kind: ResourceQuota
metadata:
name: fc-desktop-session-cap
namespace: fc-desktop
labels:
app.kubernetes.io/name: fc-desktop
app.kubernetes.io/part-of: remotedesktop
app.kubernetes.io/component: capacity-guard
app.kubernetes.io/managed-by: argocd
flowercore.io/owner: infra
annotations:
flowercore.io/rationale: |
Operator-requested limit 2026-05-19: cluster CPU exhausted by RD
pool prewarm cascade. Preserve count/pods=15 plus requests.cpu=8
and requests.memory=16Gi until capacity expansion lands.
flowercore.io/phase: sprint-44-cx-9-phase-a
spec:
hard:
count/pods: "15"
requests.cpu: "8"
requests.memory: "16Gi"
requests.storage: "500Gi"
persistentvolumeclaims: "30"

View File

@@ -976,10 +976,7 @@ metadata:
flowercore.io/runner-repo: print-web
flowercore.io/github-repo: FlowerCore.Print.Web
spec:
# Sprint 33 morning-routine (2026-05-25): bumped 2 → 3 because help-screenshots
# AAT job holds a runner 30+ min, causing head-of-line blocking on parallel PRs.
# 12 runs in trailing 5d.
replicas: 3
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-print-web
@@ -1777,8 +1774,7 @@ metadata:
flowercore.io/runner-repo: knowledge
flowercore.io/github-repo: FlowerCore.Knowledge
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — zero CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-knowledge
@@ -1911,8 +1907,7 @@ metadata:
flowercore.io/runner-repo: llm-bridge
flowercore.io/github-repo: FlowerCore.LlmBridge
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — zero CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-llm-bridge
@@ -2045,8 +2040,7 @@ metadata:
flowercore.io/runner-repo: media
flowercore.io/github-repo: FlowerCore.Media
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — zero CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-media
@@ -2179,8 +2173,7 @@ metadata:
flowercore.io/runner-repo: presentations
flowercore.io/github-repo: FlowerCore.Presentations
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — only 6 CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-presentations
@@ -2446,8 +2439,7 @@ metadata:
flowercore.io/runner-repo: dns
flowercore.io/github-repo: FlowerCore.DNS
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — zero CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-dns
@@ -3245,8 +3237,7 @@ metadata:
flowercore.io/runner-repo: intranet-web
flowercore.io/github-repo: FlowerCore.Intranet.Web
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — zero CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-intranet-web
@@ -3379,8 +3370,7 @@ metadata:
flowercore.io/runner-repo: provisioning
flowercore.io/github-repo: FlowerCore.Provisioning
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — only 3 CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-provisioning
@@ -3513,8 +3503,7 @@ metadata:
flowercore.io/runner-repo: redis
flowercore.io/github-repo: FlowerCore.Redis
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — only 3 CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-redis
@@ -3647,8 +3636,7 @@ metadata:
flowercore.io/runner-repo: message-board
flowercore.io/github-repo: FlowerCore.MessageBoard
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — only 3 CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-message-board
@@ -3781,8 +3769,7 @@ metadata:
flowercore.io/runner-repo: menu-board
flowercore.io/github-repo: FlowerCore.MenuBoard
spec:
# Sprint 33 morning-routine (2026-05-25): dropped 2 → 1 — only 3 CI runs in trailing 14d.
replicas: 1
replicas: 2
selector:
matchLabels:
app.kubernetes.io/name: github-runner-menu-board

View File

@@ -24,16 +24,7 @@
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
# fc-signage:5190 for the signage AAT lane.
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
# telephony / gitea / fc-system / fc-signage / github-runner namespaces
# on 4444.
#
# 2026-05-25: added github-runner ingress on 4444 so CI jobs running in
# self-hosted runner pods (e.g. FlowerCore.Print.Web `help-screenshots`)
# can reach the grid. Without this allow, the session POST to
# `selenium-hub.selenium.svc.cluster.local:4444` was DNAT'd to the hub
# pod IP and then dropped at the Calico ingress hook — Selenium UI showed
# 0/4 sessions while the .NET HTTP client timed out at 60s. Same family
# as `feedback_netpol_dnat_backend_port`, wrong-source-namespace flavor.
# telephony / gitea / fc-system / fc-signage namespaces on 4444.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
@@ -212,13 +203,6 @@ spec:
ports:
- port: 4444
protocol: TCP
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: github-runner
ports:
- port: 4444
protocol: TCP
podSelector: {}
policyTypes:
- Ingress

View File

@@ -1,412 +0,0 @@
# Selenium Grid 4 — RKE2 deployment
#
# Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from
# the GitHub Actions self-hosted runners. ArgoCD owns this namespace from
# 2026-05-25 (`infra-selenium` Application; previously these resources were
# orphan kubectl-applied since 2026-03-15).
#
# Endpoints:
# - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444
# - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444
# - Traefik public: https://selenium.iamworkin.lan
#
# Browser maxSessions:
# - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy
# Print.Web help-screenshots was the global bottleneck;
# see commit history for ops/runner-replica-rightsize)
# - firefox 1
# - edge 1
#
# Screenshots + video recording write to NFS via the chrome video sidecar.
# See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes.
---
apiVersion: v1
kind: Service
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
name: selenium-hub
namespace: selenium
spec:
ports:
- name: web
port: 4444
targetPort: 4444
- name: publish
port: 4442
targetPort: 4442
- name: subscribe
port: 4443
targetPort: 4443
selector:
app: selenium-hub
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
annotations:
metallb.io/ip-allocated-from-pool: bluejay-pool
metallb.universe.tf/loadBalancerIPs: 10.0.56.208
labels:
app: selenium-hub
component: external-access
name: selenium-hub-external
namespace: selenium
spec:
clusterIP: 10.43.90.147
clusterIPs:
- 10.43.90.147
externalTrafficPolicy: Local
healthCheckNodePort: 32213
ports:
- name: web
nodePort: 32411
port: 4444
targetPort: 4444
- name: publish
nodePort: 32068
port: 4442
targetPort: 4442
- name: subscribe
nodePort: 31000
port: 4443
targetPort: 4443
selector:
app: selenium-hub
type: LoadBalancer
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
name: selenium-hub
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-hub
template:
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
- name: SE_SESSION_REQUEST_TIMEOUT
value: '300'
- name: SE_SESSION_RETRY_INTERVAL
value: '5'
- name: JAVA_OPTS
value: -Xmx512m
image: selenium/hub:4.27.0
livenessProbe:
httpGet:
path: /wd/hub/status
port: 4444
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 5
name: selenium-hub
ports:
- containerPort: 4444
name: web
- containerPort: 4442
name: publish
- containerPort: 4443
name: subscribe
readinessProbe:
httpGet:
path: /wd/hub/status
port: 4444
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 5
resources:
limits:
cpu: 500m
memory: 1Gi
requests:
cpu: 250m
memory: 512Mi
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-chrome
app.kubernetes.io/name: selenium-node-chrome
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-chrome
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-chrome
template:
metadata:
labels:
app: selenium-node-chrome
app.kubernetes.io/name: selenium-node-chrome
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '2'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'false'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-chrome:4.27.0
livenessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
name: selenium-chrome
ports:
- containerPort: 5555
name: node
readinessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
resources:
limits:
cpu: '1'
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
volumeMounts:
- mountPath: /dev/shm
name: dshm
- env:
- name: DISPLAY_CONTAINER_NAME
value: localhost
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_VIDEO_FILE_NAME
value: auto
- name: SE_VIDEO_UPLOAD_ENABLED
value: 'false'
image: selenium/video:ffmpeg-7.1-20250101
name: video
resources:
limits:
cpu: 500m
memory: 768Mi
requests:
cpu: 250m
memory: 384Mi
volumeMounts:
- mountPath: /videos
name: selenium-videos
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
- emptyDir:
sizeLimit: 5Gi
name: selenium-videos
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-firefox
app.kubernetes.io/name: selenium-node-firefox
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-firefox
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-firefox
template:
metadata:
labels:
app: selenium-node-firefox
app.kubernetes.io/name: selenium-node-firefox
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '1'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'true'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_START_VNC
value: 'false'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-firefox:4.27.0
livenessProbe:
failureThreshold: 5
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 5
name: selenium-firefox
ports:
- containerPort: 5555
name: node
readinessProbe:
failureThreshold: 5
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
timeoutSeconds: 5
resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: 500m
memory: 1Gi
volumeMounts:
- mountPath: /dev/shm
name: dshm
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-edge
app.kubernetes.io/name: selenium-node-edge
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-edge
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-edge
template:
metadata:
labels:
app: selenium-node-edge
app.kubernetes.io/name: selenium-node-edge
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '1'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'true'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-edge:4.27.0
livenessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
name: selenium-edge
ports:
- containerPort: 5555
name: node
readinessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
resources:
limits:
cpu: '1'
memory: 1Gi
requests:
cpu: 500m
memory: 512Mi
volumeMounts:
- mountPath: /dev/shm
name: dshm
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: selenium-hub
namespace: selenium
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`selenium.iamworkin.lan`)
services:
- name: selenium-hub
port: 4444
tls:
secretName: selenium-tls

View File

@@ -0,0 +1,285 @@
using FluentAssertions;
using YamlDotNet.RepresentationModel;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class FcDesktopCapacityPolicyTests
{
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
[Fact]
public void FcDesktop_AppDirectoryMustExist()
{
Directory.Exists(Path.Combine(Inventory.BluejayRoot, "apps", "fc-desktop"))
.Should()
.BeTrue();
}
[Fact]
public void FcDesktop_MustHaveExactlyOneResourceQuota()
{
FcDesktopDocuments()
.Where(document => document.Kind == "ResourceQuota")
.Should()
.ContainSingle();
}
[Fact]
public void FcDesktop_ResourceQuotaMustAdoptLiveSessionCapObject()
{
var quota = ResourceQuota();
quota.RelativePath.Should().Be("fc-desktop/resourcequota.yaml");
quota.Name.Should().Be("fc-desktop-session-cap");
quota.Namespace.Should().Be("fc-desktop");
}
[Theory]
[InlineData("count/pods", "15")]
[InlineData("requests.cpu", "8")]
[InlineData("requests.memory", "16Gi")]
[InlineData("requests.storage", "500Gi")]
[InlineData("persistentvolumeclaims", "30")]
public void FcDesktop_ResourceQuotaMustDeclarePhaseOneHardLimits(string key, string value)
{
ResourceQuota().Scalar("spec", "hard", key).Should().Be(value);
}
[Fact]
public void FcDesktop_ResourceQuotaMustCarryTraceableLabels()
{
ResourceQuotaLabels()
.Should()
.Contain(new Dictionary<string, string>
{
["app.kubernetes.io/name"] = "fc-desktop",
["app.kubernetes.io/part-of"] = "remotedesktop",
["app.kubernetes.io/component"] = "capacity-guard",
["app.kubernetes.io/managed-by"] = "argocd",
["flowercore.io/owner"] = "infra",
});
}
[Fact]
public void FcDesktop_ResourceQuotaMustUseRequestsKeysForComputeCap()
{
var hardKeys = HardLimitKeys(ResourceQuota());
hardKeys.Should().Contain(new[] { "requests.cpu", "requests.memory" });
hardKeys.Should().NotContain(new[] { "cpu", "memory" });
}
[Fact]
public void FcDesktop_ResourceQuotaMustAvoidDestructiveArgoAnnotations()
{
var quota = ResourceQuota();
quota.Scalar("metadata", "annotations", "argocd.argoproj.io/hook").Should().BeNull();
quota.Scalar("metadata", "annotations", "argocd.argoproj.io/hook-delete-policy").Should().BeNull();
var syncOptions = quota.Scalar("metadata", "annotations", "argocd.argoproj.io/sync-options") ?? string.Empty;
syncOptions.Should().NotContain("Force=true");
syncOptions.Should().NotContain("Replace=true");
}
[Fact]
public void FcDesktop_ResourceQuotaMustRecordPhaseAInfraOnlyScope()
{
ResourceQuota().Scalar("metadata", "annotations", "flowercore.io/phase")
.Should()
.Be("sprint-44-cx-9-phase-a");
}
[Fact]
public void FcDesktop_MustHaveExactlyOneLimitRange()
{
FcDesktopDocuments()
.Where(document => document.Kind == "LimitRange")
.Should()
.ContainSingle();
}
[Fact]
public void FcDesktop_LimitRangeMustLiveBesideResourceQuota()
{
var limitRange = LimitRange();
limitRange.RelativePath.Should().Be("fc-desktop/limitrange.yaml");
limitRange.Name.Should().Be("fc-desktop-pod-defaults");
limitRange.Namespace.Should().Be("fc-desktop");
}
[Fact]
public void FcDesktop_LimitRangeMustHaveSingleContainerRule()
{
var limit = LimitRangeRule();
LimitRange().MappingSequence("spec", "limits").Should().ContainSingle();
ManifestNodeExtensions.Scalar(limit, "type").Should().Be("Container");
}
[Theory]
[InlineData("default", "cpu", "1.0")]
[InlineData("default", "memory", "2Gi")]
[InlineData("defaultRequest", "cpu", "500m")]
[InlineData("defaultRequest", "memory", "1Gi")]
[InlineData("max", "cpu", "2.0")]
[InlineData("max", "memory", "4Gi")]
[InlineData("min", "cpu", "100m")]
[InlineData("min", "memory", "128Mi")]
public void FcDesktop_LimitRangeMustDeclarePerPodShape(string section, string key, string value)
{
ManifestNodeExtensions.Scalar(LimitRangeRule(), section, key).Should().Be(value);
}
[Fact]
public void FcDesktop_LimitRangeMustCarryTraceableLabels()
{
LimitRangeLabels()
.Should()
.Contain(new Dictionary<string, string>
{
["app.kubernetes.io/name"] = "fc-desktop",
["app.kubernetes.io/part-of"] = "remotedesktop",
["app.kubernetes.io/component"] = "capacity-guard",
["app.kubernetes.io/managed-by"] = "argocd",
["flowercore.io/owner"] = "infra",
});
}
[Fact]
public void FcDesktop_LimitRangeMustAvoidDestructiveArgoAnnotations()
{
var limitRange = LimitRange();
limitRange.Scalar("metadata", "annotations", "argocd.argoproj.io/hook").Should().BeNull();
limitRange.Scalar("metadata", "annotations", "argocd.argoproj.io/hook-delete-policy").Should().BeNull();
var syncOptions = limitRange.Scalar("metadata", "annotations", "argocd.argoproj.io/sync-options") ?? string.Empty;
syncOptions.Should().NotContain("Force=true");
syncOptions.Should().NotContain("Replace=true");
}
[Fact]
public void FcDesktop_LimitRangeMustRecordPhaseAInfraOnlyScope()
{
LimitRange().Scalar("metadata", "annotations", "flowercore.io/phase")
.Should()
.Be("sprint-44-cx-9-phase-a");
}
[Fact]
public void FcDesktop_BluejayInfraMustNotOwnDeploymentOrService()
{
FcDesktopDocuments()
.Select(document => document.Kind)
.Should()
.NotContain(new[] { "Deployment", "Service" });
}
[Fact]
public void FcDesktop_BluejayInfraMustOnlyOwnInfraResourceKinds()
{
var allowedKinds = new HashSet<string>(StringComparer.Ordinal)
{
"Certificate",
"IngressRoute",
"NetworkPolicy",
"ResourceQuota",
"LimitRange",
};
FcDesktopDocuments()
.Select(document => document.Kind)
.Should()
.OnlyContain(kind => allowedKinds.Contains(kind));
}
[Fact]
public void FcDesktop_NetworkPolicySetMustRemainPresent()
{
FcDesktopDocuments()
.Where(document => document.Kind == "NetworkPolicy")
.Select(document => document.Name)
.Should()
.BeEquivalentTo(
"desktop-isolation",
"fc-desktop-default-deny",
"remotedesktop-web-isolation",
"cm-acme-http-solver-allow");
}
[Fact]
public void FcDesktop_TlsIngressMustRemainOwnedByInfra()
{
FcDesktopDocuments()
.Should()
.Contain(document => document.Kind == "Certificate" && document.Name == "remotedesktop-web-tls")
.And
.Contain(document => document.Kind == "IngressRoute" && document.Name == "remotedesktop-web");
}
private static IReadOnlyList<ManifestDocument> FcDesktopDocuments()
{
return Inventory.Documents
.Where(document => document.RelativePath.StartsWith("fc-desktop/", StringComparison.Ordinal))
.ToList();
}
private static ManifestDocument ResourceQuota()
{
return FcDesktopDocuments()
.Single(document => document.Kind == "ResourceQuota");
}
private static ManifestDocument LimitRange()
{
return FcDesktopDocuments()
.Single(document => document.Kind == "LimitRange");
}
private static YamlMappingNode LimitRangeRule()
{
return LimitRange()
.MappingSequence("spec", "limits")
.Single();
}
private static IReadOnlySet<string> HardLimitKeys(ManifestDocument document)
{
var hard = ManifestNodeExtensions.Mapping(document.Root, "spec", "hard")
?? throw new InvalidOperationException($"{document.Descriptor} is missing spec.hard.");
return hard.Children.Keys
.OfType<YamlScalarNode>()
.Select(key => key.Value)
.Where(value => !string.IsNullOrWhiteSpace(value))
.Cast<string>()
.ToHashSet(StringComparer.Ordinal);
}
private static IReadOnlyDictionary<string, string> ResourceQuotaLabels()
{
return Labels(ResourceQuota());
}
private static IReadOnlyDictionary<string, string> LimitRangeLabels()
{
return Labels(LimitRange());
}
private static IReadOnlyDictionary<string, string> Labels(ManifestDocument document)
{
var labels = ManifestNodeExtensions.Mapping(document.Root, "metadata", "labels")
?? throw new InvalidOperationException($"{document.Descriptor} is missing metadata.labels.");
return labels.Children
.Where(entry => entry.Key is YamlScalarNode && entry.Value is YamlScalarNode)
.ToDictionary(
entry => ((YamlScalarNode)entry.Key).Value ?? string.Empty,
entry => ((YamlScalarNode)entry.Value).Value ?? string.Empty,
StringComparer.Ordinal);
}
}

View File

@@ -234,7 +234,7 @@ public sealed class FleetManifestLintTests
{
deployments.Should().ContainKey(expectedRunner.Key);
var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
var container = RunnerContainer(deployments[expectedRunner.Key]);
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
EnvValue(container, "EPHEMERAL").Should().Be("true");
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
@@ -250,7 +250,7 @@ public sealed class FleetManifestLintTests
{
foreach (var deployment in GitHubRunnerDeployments().Values)
{
var container = deployment.ContainerMappings().Should().ContainSingle().Subject;
var container = RunnerContainer(deployment);
foreach (var expectedEnv in WritableRunnerEnv)
{
@@ -430,7 +430,6 @@ public sealed class FleetManifestLintTests
var expectedFiles = new[]
{
"1password-item.yaml",
"argocd-application.yaml",
"certificate-web.yaml",
"clusterrole-operator.yaml",
"clusterrolebinding-operator.yaml",
@@ -586,17 +585,15 @@ public sealed class FleetManifestLintTests
}
[Fact]
public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
public void FcDeviceManagement_MustRelyOnApplicationSetDiscovery()
{
var application = FcDeviceManagementDocuments()
.Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
application.Namespace.Should().Be("argocd");
application.Scalar("spec", "source", "repoURL")
FcDeviceManagementDocuments()
.Should()
.Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
.NotContain(document => document.Kind == "Application", "the root ApplicationSet owns apps/fc-devicemgmt discovery");
FcDeviceManagementDocuments()
.Should()
.Contain(document => document.Kind == "Namespace" && document.Name == "fc-devicemgmt");
}
private static IEnumerable<string> ProbeViolations(
@@ -631,6 +628,12 @@ public sealed class FleetManifestLintTests
.ToDictionary(document => document.Name, StringComparer.Ordinal);
}
private static YamlMappingNode RunnerContainer(ManifestDocument deployment)
{
return deployment.ContainerMappings()
.Single(container => string.Equals(ManifestNodeExtensions.Scalar(container, "name"), "runner", StringComparison.Ordinal));
}
private static int ReplicaCount(ManifestDocument document)
{
return int.TryParse(document.Scalar("spec", "replicas"), out var replicas) ? replicas : 1;