Compare commits
21 Commits
claude/ci1
...
211ecbf294
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
211ecbf294 | ||
|
|
f298339152 | ||
|
|
6e7d88db49 | ||
|
|
5ae50bd491 | ||
|
|
653d4472f5 | ||
|
|
eb8693e1ce | ||
|
|
667777a653 | ||
|
|
84c9feb893 | ||
|
|
427dbfcef2 | ||
|
|
b651a4e2d0 | ||
|
|
b998f50f48 | ||
|
|
8fd9ae1cd3 | ||
|
|
fc2aca0e9e | ||
|
|
ba18c52130 | ||
|
|
9f6dc1a9d5 | ||
|
|
0bf47dfa33 | ||
|
|
87a7d7c70a | ||
|
|
1c4145a581 | ||
|
|
c50a403f74 | ||
|
|
fb7bd10528 | ||
|
|
6c21d14a98 |
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Runtime secrets for FlowerCore.DeviceManagement.
|
||||||
|
#
|
||||||
|
# OnePasswordItem operator syncs this item into a Kubernetes Secret with the
|
||||||
|
# same name. Expected fields:
|
||||||
|
# DB-Password
|
||||||
|
# mtls-ca.pem
|
||||||
|
# mtls-client.crt
|
||||||
|
# mtls-client.key
|
||||||
|
# mtls-chain.pem
|
||||||
|
#
|
||||||
|
# Do not add literal secret values to this repo. Runtime pods consume the
|
||||||
|
# synced Secret through env vars and read-only mounts.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/component: secrets
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"
|
||||||
33
apps/fc-devicemgmt/argocd-application.yaml
Normal file
33
apps/fc-devicemgmt/argocd-application.yaml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# Explicit ArgoCD Application shape for bootstrap/review.
|
||||||
|
#
|
||||||
|
# The live bluejay-infra ApplicationSet already discovers apps/* directories
|
||||||
|
# and creates this same Application name (`infra-fc-devicemgmt`) automatically.
|
||||||
|
# Keep repoURL on the internal Gitea ClusterIP URL; ArgoCD does not trust the
|
||||||
|
# external step-ca HTTPS endpoint.
|
||||||
|
apiVersion: argoproj.io/v1alpha1
|
||||||
|
kind: Application
|
||||||
|
metadata:
|
||||||
|
name: infra-fc-devicemgmt
|
||||||
|
namespace: argocd
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
project: default
|
||||||
|
source:
|
||||||
|
repoURL: http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git
|
||||||
|
targetRevision: main
|
||||||
|
path: apps/fc-devicemgmt
|
||||||
|
destination:
|
||||||
|
server: https://kubernetes.default.svc
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
syncPolicy:
|
||||||
|
automated:
|
||||||
|
prune: true
|
||||||
|
selfHeal: true
|
||||||
|
syncOptions:
|
||||||
|
- CreateNamespace=true
|
||||||
|
- ServerSideApply=true
|
||||||
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Certificate for devices.iamworkin.lan.
|
||||||
|
#
|
||||||
|
# Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
|
||||||
|
# devices.iamworkin.lan -> 10.0.56.200
|
||||||
|
# before this Certificate is synced. step-ca ACME cannot see the CoreDNS
|
||||||
|
# wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
|
||||||
|
# (feedback_pfsense_dns_required_for_acme).
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-tls
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
|
||||||
|
spec:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- devices.iamworkin.lan
|
||||||
|
duration: 720h
|
||||||
|
renewBefore: 240h
|
||||||
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- devices/status
|
||||||
|
- devices/finalizers
|
||||||
|
- devicegroups/status
|
||||||
|
- devicegroups/finalizers
|
||||||
|
- devicepolicies/status
|
||||||
|
- devicepolicies/finalizers
|
||||||
|
- remotecommands/status
|
||||||
|
- remotecommands/finalizers
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- services
|
||||||
|
- configmaps
|
||||||
|
- secrets
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- batch
|
||||||
|
resources:
|
||||||
|
- jobs
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- networkpolicies
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# FlowerCore.DeviceManagement Operator.
|
||||||
|
#
|
||||||
|
# KubeOps controller for devices.flowercore.io resources. Operator-created
|
||||||
|
# children must set OwnerReferences + traceability labels/annotations per
|
||||||
|
# k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
|
||||||
|
# apps/deployments/get so the process can resolve its own Deployment UID.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: fc-devicemgmt-operator
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: operator
|
||||||
|
image: localhost/fc-devicemgmt-operator:v20260512-cx5
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
|
||||||
|
value: "fc-devicemgmt-operator"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Operator"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 20
|
||||||
|
periodSeconds: 30
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
135
apps/fc-devicemgmt/deployment-web.yaml
Normal file
135
apps/fc-devicemgmt/deployment-web.yaml
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
# FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
|
||||||
|
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||||
|
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||||
|
# nodes before letting ArgoCD sync a live rollout.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-devicemgmt-web:v20260512-cx5
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Web"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "MySql"
|
||||||
|
- name: FlowerCore__Database__Host
|
||||||
|
value: "mysql.fc-mysql.svc"
|
||||||
|
- name: FlowerCore__Database__Database
|
||||||
|
value: "flowercore_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__User
|
||||||
|
value: "fc_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
key: DB-Password
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.key"
|
||||||
|
- name: FlowerCore__EventBus__Redis__Configuration
|
||||||
|
value: "redis.fc-redis.svc:6379"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
startupProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
mountPath: /secrets/devicemgmt-mtls
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
secret:
|
||||||
|
secretName: fc-devicemgmt-runtime
|
||||||
|
defaultMode: 0400
|
||||||
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# LAN ingress for FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
|
||||||
|
# ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`devices.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: fc-devicemgmt-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
|
||||||
|
# Future public agent/update host gate (OFF by default):
|
||||||
|
#
|
||||||
|
# Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
|
||||||
|
# resolves the public-device-management auth model and route ownership with
|
||||||
|
# UpdateCenter. When enabled, use a separate public IngressRoute with an
|
||||||
|
# explicit Method allowlist, public-host auth middleware, and public TLS
|
||||||
|
# certificate strategy. Leaving this as comments keeps ArgoCD from stealing
|
||||||
|
# live UpdateCenter traffic.
|
||||||
|
#
|
||||||
|
# apiVersion: traefik.io/v1alpha1
|
||||||
|
# kind: IngressRoute
|
||||||
|
# metadata:
|
||||||
|
# name: fc-devicemgmt-web-public
|
||||||
|
# namespace: fc-devicemgmt
|
||||||
|
# annotations:
|
||||||
|
# flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
|
||||||
|
# spec:
|
||||||
|
# entryPoints:
|
||||||
|
# - websecure
|
||||||
|
# routes:
|
||||||
|
# - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
# kind: Rule
|
||||||
|
# services:
|
||||||
|
# - name: fc-devicemgmt-web
|
||||||
|
# port: 80
|
||||||
|
# tls:
|
||||||
|
# secretName: fc-devicemgmt-public-tls
|
||||||
13
apps/fc-devicemgmt/namespace.yaml
Normal file
13
apps/fc-devicemgmt/namespace.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# FlowerCore.DeviceManagement namespace.
|
||||||
|
#
|
||||||
|
# ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
# FlowerCore.DeviceManagement NetworkPolicies.
|
||||||
|
#
|
||||||
|
# NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
|
||||||
|
# Rules include Traefik post-DNAT backend ports per
|
||||||
|
# feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
|
||||||
|
# cold-tier / future artifact path.
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
# LAN edge: only cluster Traefik should reach the Web pod for
|
||||||
|
# devices.iamworkin.lan.
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
|
||||||
|
- from:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Database namespace.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-mysql
|
||||||
|
ports:
|
||||||
|
- port: 3306
|
||||||
|
protocol: TCP
|
||||||
|
# Redis backplane for multi-replica SignalR / live-status fan-out.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
protocol: TCP
|
||||||
|
# Traefik VIP / in-cluster Traefik for self-callbacks and public URL
|
||||||
|
# generation tests. Include post-DNAT backend ports 8443 + 8080.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
|
||||||
|
# ThinClient, or Server mode. Keep this private-range only.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS cold-tier / artifact mount allowance.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: monitoring
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress for operator-initiated probes / fallback command dispatch.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS allowance for future cold-tier/audit archival jobs.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
22
apps/fc-devicemgmt/service-web.yaml
Normal file
22
apps/fc-devicemgmt/service-web.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
protocol: TCP
|
||||||
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
171
apps/fc-redis/fc-redis.yaml
Normal file
171
apps/fc-redis/fc-redis.yaml
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
# fc-redis — SignalR backplane for cross-product event bus
|
||||||
|
#
|
||||||
|
# Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
|
||||||
|
# not Phase C as originally drafted. Operator directive: "Redis can be
|
||||||
|
# deployed just fine as it's another FlowerCore technology we'll want to
|
||||||
|
# manage."
|
||||||
|
#
|
||||||
|
# Phase A scope (this file):
|
||||||
|
# - Single Redis 7.x Alpine pod
|
||||||
|
# - 1Gi Longhorn RWO PVC for AOF persistence
|
||||||
|
# - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
|
||||||
|
# - No AUTH (in-cluster only; not exposed externally)
|
||||||
|
# - No IngressRoute (backplane is server-to-server only)
|
||||||
|
#
|
||||||
|
# Consumers (Phase A IMPL across FC services):
|
||||||
|
# - FlowerCore.Signage.Web (OpsConsoleHub)
|
||||||
|
# - FlowerCore.Scoreboard.Web (ScoreboardHub)
|
||||||
|
# - FlowerCore.SignalControl.Web
|
||||||
|
# - FlowerCore.DMS.Web
|
||||||
|
# - Any other product joining the cross-product event bus
|
||||||
|
#
|
||||||
|
# Each consumer adds:
|
||||||
|
# services.AddSignalR()
|
||||||
|
# .AddStackExchangeRedis(
|
||||||
|
# "redis.fc-redis.svc.cluster.local:6379",
|
||||||
|
# opts => opts.Configuration.ChannelPrefix =
|
||||||
|
# StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
|
||||||
|
#
|
||||||
|
# Phase B / C follow-ons (out of scope here):
|
||||||
|
# - Redis Sentinel for HA (3-node)
|
||||||
|
# - AUTH password from 1Password Connect (rotate via /rotate-password)
|
||||||
|
# - redis_exporter sidecar for Prometheus scrape
|
||||||
|
# - Network policies restricting which namespaces can dial 6379
|
||||||
|
#
|
||||||
|
# Design: docs/signage/operations-console-phase-2-design.md §3.5
|
||||||
|
# Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
|
||||||
|
# Memory: feedback_blooming_ui_pattern_no_iframes
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-data
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-config
|
||||||
|
namespace: fc-redis
|
||||||
|
data:
|
||||||
|
redis.conf: |
|
||||||
|
# Phase A — minimal config; no AUTH, no replication.
|
||||||
|
bind 0.0.0.0
|
||||||
|
protected-mode no
|
||||||
|
port 6379
|
||||||
|
tcp-backlog 511
|
||||||
|
timeout 0
|
||||||
|
tcp-keepalive 300
|
||||||
|
|
||||||
|
# Persistence: AOF (fsync every second is the standard SignalR-backplane
|
||||||
|
# durability sweet spot — the backplane only needs to survive Redis
|
||||||
|
# restarts, not absolute zero loss).
|
||||||
|
appendonly yes
|
||||||
|
appendfsync everysec
|
||||||
|
auto-aof-rewrite-percentage 100
|
||||||
|
auto-aof-rewrite-min-size 64mb
|
||||||
|
|
||||||
|
# Reasonable defaults — let Redis pick most things.
|
||||||
|
maxmemory-policy allkeys-lru
|
||||||
|
maxmemory 256mb
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
loglevel notice
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
namespace: fc-redis
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # RWO PVC; do not do rolling update
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 999 # redis:7-alpine default uid
|
||||||
|
runAsGroup: 999
|
||||||
|
fsGroup: 999
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command: ["redis-server", "/etc/redis/redis.conf"]
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
containerPort: 6379
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "50m"
|
||||||
|
memory: "128Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "384Mi"
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/redis
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 6379
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: [ALL]
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: fc-redis-data
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: fc-redis-config
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
protocol: TCP
|
||||||
@@ -58,7 +58,7 @@ spec:
|
|||||||
nodeName: rke2-server
|
nodeName: rke2-server
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-updater-web:v20260507-public-privacy
|
image: localhost/fc-updater-web:v20260509-4162dca-authgate
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|||||||
@@ -466,11 +466,11 @@ spec:
|
|||||||
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
||||||
---
|
---
|
||||||
---
|
---
|
||||||
# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
|
# 1Password-backed credentials for Mac mini VNC access (Phase 1 <EFBFBD> 2026-04-28)
|
||||||
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
||||||
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
||||||
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
||||||
# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
|
# Actual IP is 10.0.56.115 (INFRA VLAN) <EFBFBD> the 1P item 'IP' field is kept as backup reference.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
@@ -481,6 +481,7 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: flowercore
|
app.kubernetes.io/part-of: flowercore
|
||||||
spec:
|
spec:
|
||||||
itemPath: vaults/IAmWorkin/items/Mac Mini
|
itemPath: vaults/IAmWorkin/items/Mac Mini
|
||||||
|
---
|
||||||
# Blue Jay Branding Extension (CSS + translations)
|
# Blue Jay Branding Extension (CSS + translations)
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|||||||
@@ -6,14 +6,29 @@
|
|||||||
# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
|
# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
|
||||||
# BLUEJAY-WS registered as a runner (workstation has personal/operator state).
|
# BLUEJAY-WS registered as a runner (workstation has personal/operator state).
|
||||||
#
|
#
|
||||||
# Status (2026-05-08): STAGED ONLY — DO NOT APPLY without operator review.
|
# Storage layout (2026-05-08):
|
||||||
# See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
|
# * ISO is now sourced from Synology NFS (Path B) — see
|
||||||
|
# win2025-iso-nfs-pv.yaml. The Longhorn Filesystem PVC
|
||||||
|
# `windows-server-2025-iso` below is RETAINED but UNUSED so the prior
|
||||||
|
# CDI upload state is preserved as a fallback (and so ArgoCD doesn't
|
||||||
|
# prune it on this commit). It can be deleted in a follow-up commit
|
||||||
|
# after the NFS path is proven on a successful Windows install.
|
||||||
#
|
#
|
||||||
# Prerequisites that MUST be satisfied first:
|
# Status (2026-05-08): LIVE — Phase 1 prereqs satisfied:
|
||||||
# 1. Windows Server 2025 ISO populated into the `windows-server-2025-iso` PVC
|
# * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes
|
||||||
# (operator interactive step — Microsoft Evaluation Center download).
|
# (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy)
|
||||||
# 2. Either Multus + PROD VLAN NAD (preferred) OR pod-network only (this YAML).
|
# * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet
|
||||||
# 3. KubeVirt CR feature gates: none required for non-persistent vTPM.
|
# `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward)
|
||||||
|
# * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to
|
||||||
|
# PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete)
|
||||||
|
# * Local Administrator password generated, stored in 1Password vault
|
||||||
|
# IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu
|
||||||
|
# * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/
|
||||||
|
# prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5
|
||||||
|
# host bridge work lands (Puppet br-prod + enp86s0.57); switching is a
|
||||||
|
# one-line YAML edit + git push.
|
||||||
|
#
|
||||||
|
# See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
|
||||||
#
|
#
|
||||||
# Network choice in this draft: **pod-network fallback** (Calico default).
|
# Network choice in this draft: **pod-network fallback** (Calico default).
|
||||||
# Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
|
# Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
|
||||||
@@ -42,21 +57,49 @@ metadata:
|
|||||||
pod-security.kubernetes.io/enforce: privileged
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
|
||||||
---
|
---
|
||||||
# ISO PVC — operator must populate this before applying the VM manifest.
|
# ISO PVC — populated via CDI virtctl image-upload (CDI is now installed).
|
||||||
# Population paths (see plan doc "Phase 1 readiness gate", section 2):
|
#
|
||||||
# Path A — manual upload via helper pod + kubectl cp
|
# **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to
|
||||||
# Path B — install CDI, then DataVolume HTTP import
|
# `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA
|
||||||
|
# CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107
|
||||||
|
# with `capabilities.drop: [ALL]` and cannot open the underlying block
|
||||||
|
# device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`).
|
||||||
|
# Reverted to Filesystem PVC pending one of:
|
||||||
|
# - CDI deployment override granting CAP_SYS_RAWIO to upload pod
|
||||||
|
# - Pre-populated PVC via privileged init pod that dd's the ISO directly
|
||||||
|
# - Migration to a different storage class that exposes block devices
|
||||||
|
# differently (e.g. iSCSI, where Longhorn's CSI mount path may behave
|
||||||
|
# differently)
|
||||||
|
#
|
||||||
|
# Population workflow (this PVC, Filesystem mode):
|
||||||
|
# 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \
|
||||||
|
# windows-server-2025-iso -n kubevirt-vms \
|
||||||
|
# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \
|
||||||
|
# --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \
|
||||||
|
# --uploadproxy-url https://localhost:8443 --insecure
|
||||||
|
# (--uploadproxy-url uses port-forward in practice: `kubectl port-forward
|
||||||
|
# -n cdi service/cdi-uploadproxy 8443:443 &` first.)
|
||||||
|
#
|
||||||
|
# **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed:
|
||||||
|
# BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...)
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the
|
||||||
|
# timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC.
|
||||||
|
# Block mode would likely fix it; see CDI permission issue above.
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: PersistentVolumeClaim
|
kind: PersistentVolumeClaim
|
||||||
metadata:
|
metadata:
|
||||||
name: windows-server-2025-iso
|
name: windows-server-2025-iso
|
||||||
namespace: kubevirt-vms
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
spec:
|
spec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use
|
- ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
storage: 6Gi
|
storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom
|
||||||
storageClassName: longhorn
|
storageClassName: longhorn
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -220,10 +263,16 @@ data:
|
|||||||
</OOBE>
|
</OOBE>
|
||||||
<UserAccounts>
|
<UserAccounts>
|
||||||
<AdministratorPassword>
|
<AdministratorPassword>
|
||||||
<!-- IMPORTANT: replace the Value below with a real password BEFORE applying.
|
<!-- Real password is in 1Password — vault qaphopopkryhbg353ukzhhuqoq,
|
||||||
Generate via: $pw = "YourPasswordHere" + "AdministratorPassword";
|
item id h3ix4mgfk65gmkcmvh6ly3d3hu, title:
|
||||||
[Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($pw)) -->
|
"ci1 Administrator (Windows Server 2025 KubeVirt VM)".
|
||||||
<Value>UABMAEEAQwBFAEgATwBMAEQARQBSAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
|
Field "autounattend AdministratorPassword Value (UTF-16-LE base64)"
|
||||||
|
matches the Value below.
|
||||||
|
To rotate: regenerate, recompute base64
|
||||||
|
$combined = $pw + "AdministratorPassword"
|
||||||
|
[Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($combined))
|
||||||
|
then update both 1P item AND this Value field, recreate VM. -->
|
||||||
|
<Value>bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
|
||||||
<PlainText>false</PlainText>
|
<PlainText>false</PlainText>
|
||||||
</AdministratorPassword>
|
</AdministratorPassword>
|
||||||
</UserAccounts>
|
</UserAccounts>
|
||||||
@@ -260,7 +309,33 @@ metadata:
|
|||||||
role: github-actions-runner
|
role: github-actions-runner
|
||||||
flowercore.io/managed-by: bluejay-infra
|
flowercore.io/managed-by: bluejay-infra
|
||||||
spec:
|
spec:
|
||||||
running: false # Set to true after operator approves + ISO loaded
|
# `running: true` is deprecated in favor of `runStrategy`. They are mutually
|
||||||
|
# exclusive — KubeVirt's validating webhook rejects any VM that sets both:
|
||||||
|
# admission webhook "virtualmachine-validator.kubevirt.io" denied the request:
|
||||||
|
# Running and RunStrategy are mutually exclusive.
|
||||||
|
# `Always` keeps a VMI running and restarts it if it crashes/exits — same
|
||||||
|
# semantics as the old `running: true`.
|
||||||
|
#
|
||||||
|
# **2026-05-08 status: VM cannot start due to a stale QEMU flock on the
|
||||||
|
# rootdisk PVC** (qemu reports `Failed to get "write" lock` on
|
||||||
|
# `/var/run/kubevirt-private/vmi-disks/rootdisk/disk.img`). The flock was
|
||||||
|
# left by a previous QEMU process during a force-deleted launcher pod
|
||||||
|
# cycle. Recovery requires either (a) a Longhorn engine restart on
|
||||||
|
# rke2-agent2, (b) a Longhorn volume detach via the longhorn-manager API
|
||||||
|
# (kubectl patch on `volume.longhorn.io/<pvc-name>` does not work — the
|
||||||
|
# spec.nodeID is reconciled back), or (c) a node reboot of rke2-agent2.
|
||||||
|
#
|
||||||
|
# **Confirmed working:** the bootOrder swap (windows-iso=1, rootdisk=2)
|
||||||
|
# and the runStrategy migration (above). The ISO PVC was successfully
|
||||||
|
# repopulated via virtctl image-upload pvc on the Filesystem-mode PVC.
|
||||||
|
#
|
||||||
|
# **Open: SATA CDROM read timeout** — even with bootOrder=1, OVMF reported
|
||||||
|
# `BdsDxe: failed to start Boot0001 ... Time out` reading the SATA CDROM
|
||||||
|
# backed by the Filesystem-mode PVC. A switch to Block-mode DataVolume
|
||||||
|
# was attempted but blocked by a CDI v1.65.0 upload-pod permission issue
|
||||||
|
# (capability drop prevents writing to the underlying block device).
|
||||||
|
# See header docstring on the ISO PVC.
|
||||||
|
runStrategy: Always # LIVE — ISO uploaded 2026-05-08, password in 1P
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
@@ -302,18 +377,60 @@ spec:
|
|||||||
firmware:
|
firmware:
|
||||||
bootloader:
|
bootloader:
|
||||||
efi:
|
efi:
|
||||||
secureBoot: true
|
# 2026-05-08: SecureBoot=false during initial install. With SecureBoot
|
||||||
|
# enabled, OVMF's BdsDxe times out reading Boot0001 from the SCSI
|
||||||
|
# CDROM ("BdsDxe: failed to start Boot0001 ... Time out") before the
|
||||||
|
# EFI bootloader signature can verify against the OVMF VARS trust DB.
|
||||||
|
# KubeVirt's `/usr/share/OVMF/OVMF_VARS.secboot.fd` template doesn't
|
||||||
|
# appear to include the Microsoft KEK/DB by default, so signed
|
||||||
|
# Windows EFI bootloaders fail validation. Disabling SecureBoot lets
|
||||||
|
# OVMF skip the chain check and boot directly. This is acceptable for
|
||||||
|
# a CI runner — TPM 2.0 is still emulated (`tpm: {}` below) so
|
||||||
|
# BitLocker / Hyper-V / WSL still work.
|
||||||
|
# When the operator wants SecureBoot back, the path is:
|
||||||
|
# 1. Custom-build OVMF_VARS.fd with Microsoft KEK/DB enrolled
|
||||||
|
# 2. Mount it into the VM via firmware.bootloader.efi.persistent
|
||||||
|
# 3. Set secureBoot: true again
|
||||||
|
# Tracked separately from the install unblock.
|
||||||
|
secureBoot: false
|
||||||
devices:
|
devices:
|
||||||
tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker
|
tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker
|
||||||
disks:
|
disks:
|
||||||
- name: rootdisk
|
# bootOrder: ISO must be 1 for first-boot install (the rootdisk has no
|
||||||
|
# EFI bootloader yet). After Windows installs, it writes its own UEFI
|
||||||
|
# Boot#### entries pointing at the rootdisk's EFI partition; UEFI then
|
||||||
|
# boots from rootdisk going forward and the ISO at bootOrder:2 acts as
|
||||||
|
# a fallback for re-install scenarios.
|
||||||
|
#
|
||||||
|
# Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried
|
||||||
|
# the empty virtio disk first, got nothing, fell back to the SATA
|
||||||
|
# CDROM at Boot0001 with a short timeout, and timed out before the
|
||||||
|
# CDROM enumerated. Console showed:
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found.
|
||||||
|
# Confirmed via debug pod: PVC content IS a real bootable ISO9660
|
||||||
|
# (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the
|
||||||
|
# only bug was boot priority.
|
||||||
|
# 2026-05-08 PM: cdrom bus SCSI + containerDisk delivery. This
|
||||||
|
# combination boots qemu cleanly and reaches OVMF, but OVMF
|
||||||
|
# BdsDxe still hits "starting Boot0001 ... Time out" on the
|
||||||
|
# cdrom — see HANDOFF.md / CODEX-STATUS.md "OPEN — ci1" for the
|
||||||
|
# full diagnostic chain. virtio-blk disk swap was attempted as a
|
||||||
|
# workaround but introduced a separate QEMU rootdisk flock issue
|
||||||
|
# without fixing the underlying OVMF cdrom problem; reverted.
|
||||||
|
# Operator decision needed for next architectural step (OVMF
|
||||||
|
# custom build with extended timeout, KubeVirt version bump,
|
||||||
|
# Hyper-V/VirtualBox-and-export, or BIOS legacy boot). The
|
||||||
|
# containerDisk distribution pipeline (build/save/scp/ctr import)
|
||||||
|
# is proven and ready to reuse for any of those.
|
||||||
|
- name: windows-iso
|
||||||
bootOrder: 1
|
bootOrder: 1
|
||||||
|
cdrom:
|
||||||
|
bus: scsi
|
||||||
|
- name: rootdisk
|
||||||
|
bootOrder: 2
|
||||||
disk:
|
disk:
|
||||||
bus: virtio
|
bus: virtio
|
||||||
- name: windows-iso
|
|
||||||
bootOrder: 2
|
|
||||||
cdrom:
|
|
||||||
bus: sata
|
|
||||||
- name: virtio-drivers
|
- name: virtio-drivers
|
||||||
cdrom:
|
cdrom:
|
||||||
bus: sata
|
bus: sata
|
||||||
@@ -340,11 +457,50 @@ spec:
|
|||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
claimName: ci1-rootdisk
|
claimName: ci1-rootdisk
|
||||||
- name: windows-iso
|
- name: windows-iso
|
||||||
persistentVolumeClaim:
|
# 2026-05-08 PM (Path C, CONTAINERDISK): the ISO is now packaged as
|
||||||
claimName: windows-server-2025-iso
|
# a KubeVirt containerDisk OCI image baked from
|
||||||
|
# `FROM scratch ; ADD --chown=107:107 disk.img /disk/disk.img`.
|
||||||
|
# The qemu user (uid 107) reads the ISO directly from a tmpfs view
|
||||||
|
# of the OCI layer, bypassing both:
|
||||||
|
# - Synology NFS export ACL (Path B failed: uid 107 denied at
|
||||||
|
# directory level even with mode 0777, see memory
|
||||||
|
# feedback_synology_iso_export_root_only_uid_107_denied)
|
||||||
|
# - OVMF cdrom read-window timeout (Path A and Path B's SCSI
|
||||||
|
# retry both hit `BdsDxe: failed to start Boot0001 ... Time out`
|
||||||
|
# when the cdrom was backed by a PVC the storage controller
|
||||||
|
# couldn't satisfy reads from fast enough).
|
||||||
|
#
|
||||||
|
# Image build (one-time, per ISO version):
|
||||||
|
# 1. Copy ISO to disk.img, write Dockerfile
|
||||||
|
# 2. podman build --tag localhost/win-server-2025:1.0 . (on noc1)
|
||||||
|
# 3. podman save -o win-server-2025-1.0.tar localhost/win-server-2025:1.0
|
||||||
|
# 4. SCP tar to all 3 RKE2 nodes (rke2-server, rke2-agent1, rke2-agent2)
|
||||||
|
# 5. sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||||
|
# -n k8s.io images import /tmp/win-server-2025-1.0.tar
|
||||||
|
# Standard FC pattern per `feedback_rke2_localhost_imagepullpolicy`.
|
||||||
|
#
|
||||||
|
# When a new Windows ISO version ships, bump the tag (1.1, 1.2, ...),
|
||||||
|
# rebuild + redistribute, and update the image: line below in a new
|
||||||
|
# commit. KubeVirt picks up the new image via a VM restart.
|
||||||
|
#
|
||||||
|
# The legacy NFS PVC + PV (apps/kubevirt-vms/win2025-iso-nfs-pv.yaml)
|
||||||
|
# and CDI Longhorn PVC (`windows-server-2025-iso`) are RETAINED for
|
||||||
|
# this commit so the prior states are recoverable. Once the
|
||||||
|
# containerDisk path proves on a successful Windows install, both
|
||||||
|
# legacy artifacts can be pruned in a follow-up commit.
|
||||||
|
containerDisk:
|
||||||
|
image: localhost/win-server-2025:1.0
|
||||||
|
imagePullPolicy: Never
|
||||||
- name: virtio-drivers
|
- name: virtio-drivers
|
||||||
containerDisk:
|
containerDisk:
|
||||||
image: quay.io/kubevirt/virtio-container-disk
|
# Pinned to v1.8.2 (latest stable as of 2026-05-08).
|
||||||
|
# The :latest tag uses Docker manifest v1 schema which containerd
|
||||||
|
# 2.1 (RKE2 v1.34.5) refuses to pull with:
|
||||||
|
# "media type application/vnd.docker.distribution.manifest.v1+prettyjws
|
||||||
|
# is no longer supported since containerd v2.1"
|
||||||
|
# v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1.
|
||||||
|
# Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags
|
||||||
|
image: quay.io/kubevirt/virtio-container-disk:v1.8.2
|
||||||
- name: sysprep
|
- name: sysprep
|
||||||
sysprep:
|
sysprep:
|
||||||
configMap:
|
configMap:
|
||||||
|
|||||||
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Windows Server 2025 ISO — Static NFS PV (Path B for SATA-CDROM timeout)
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: Mount the ISO from Synology NAS via NFS instead of from a Longhorn-
|
||||||
|
# backed Filesystem PVC.
|
||||||
|
#
|
||||||
|
# Why: SATA-CDROM emulation reading from a Longhorn-backed Filesystem PVC is
|
||||||
|
# too slow for OVMF's boot read window — the DVD-ROM enumeration times out
|
||||||
|
# before the bootloader can be read. Symptom on the serial console:
|
||||||
|
# BdsDxe: failed to start Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ...
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found
|
||||||
|
# Diagnosis confirmed the ISO content is a perfectly valid bootable ISO9660
|
||||||
|
# image — the bug is in the timing path between OVMF and Longhorn-backed
|
||||||
|
# storage, not in the ISO itself.
|
||||||
|
#
|
||||||
|
# Block-mode PVC was tried (`volumeMode: Block` via DataVolume) and would
|
||||||
|
# likely fix the timing, but CDI v1.65.0's upload-target pod cannot open the
|
||||||
|
# block device due to runAsUser:107 + capabilities.drop:[ALL] and we got:
|
||||||
|
# blockdev: cannot open /dev/cdi-block-volume: Permission denied
|
||||||
|
#
|
||||||
|
# NFS-mounted ISO bypasses both issues: no Longhorn slowness, no CDI upload
|
||||||
|
# pod permission concerns. The ISO is read directly from the NAS over a
|
||||||
|
# native NFSv4.1 mount that QEMU's SATA emulator can read at full LAN speed.
|
||||||
|
#
|
||||||
|
# Layout on Synology:
|
||||||
|
# /volume1/ISOs/ (existing export, RKE2 ACL)
|
||||||
|
# en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso
|
||||||
|
# win2025-iso-disk/ (new subdir, 2026-05-08)
|
||||||
|
# disk.img -> hardlink to ../en-us_windows_server_2025_..._8e06425a.iso
|
||||||
|
#
|
||||||
|
# KubeVirt's launcher pod expects a PVC mounted at
|
||||||
|
# /var/run/kubevirt-private/vmi-disks/<diskName>/disk.img — by mounting the
|
||||||
|
# `win2025-iso-disk/` subdir as the NFS PV root, `disk.img` lives at the PV's
|
||||||
|
# root and KubeVirt's CDROM emulator finds it without any path manipulation.
|
||||||
|
#
|
||||||
|
# A symlink would NOT work for sub-path NFS mounts (the relative target
|
||||||
|
# `../...iso` falls outside the sub-mount root). A hardlink works because it
|
||||||
|
# references the same inode regardless of mount point.
|
||||||
|
#
|
||||||
|
# Memory references:
|
||||||
|
# - feedback_synology_nfs_volume1_kubernetes_export_scoped (Synology export
|
||||||
|
# scoping pattern — but /volume1/ISOs export, unlike /volume1/kubernetes,
|
||||||
|
# does support sub-path mounts because Synology NFS is configured with
|
||||||
|
# pseudo-fs in NFSv4.1)
|
||||||
|
# - feedback_kubevirt_iso_first_install_bootorder_and_runstrategy (boot
|
||||||
|
# order / runStrategy gotchas, separate from the storage timing issue)
|
||||||
|
#
|
||||||
|
# Validation (2026-05-08, from rke2-server / rke2-agent1 / rke2-agent2):
|
||||||
|
# mount -t nfs -o nfsvers=4.1,ro 10.0.58.3:/volume1/ISOs/win2025-iso-disk /tmp/m
|
||||||
|
# file /tmp/m/disk.img
|
||||||
|
# -> ISO 9660 CD-ROM filesystem data 'SSS_X64FRE_EN-US_DV9' (bootable)
|
||||||
|
# All 3 RKE2 nodes can mount and read.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
labels:
|
||||||
|
flowercore.io/iso: windows-server-2025
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 8Gi
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: "" # static, no provisioner
|
||||||
|
mountOptions:
|
||||||
|
- nfsvers=4.1
|
||||||
|
- ro
|
||||||
|
- hard
|
||||||
|
- timeo=600
|
||||||
|
- retrans=3
|
||||||
|
nfs:
|
||||||
|
server: 10.0.58.3 # BlueJayNAS Synology DS1621+ on HOME VLAN 58
|
||||||
|
path: /volume1/ISOs/win2025-iso-disk
|
||||||
|
readOnly: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 8Gi
|
||||||
|
storageClassName: ""
|
||||||
|
volumeName: windows-server-2025-iso-nfs
|
||||||
@@ -974,6 +974,39 @@ data:
|
|||||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
||||||
|
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
||||||
|
# outage (21h) hit because no alert fired on the rising multus working
|
||||||
|
# set — only downstream blackbox / Traefik / service alerts. With
|
||||||
|
# 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
|
||||||
|
# runs ~150-250MiB so this only fires when an avalanche starts.
|
||||||
|
- alert: MultusMemoryPressure
|
||||||
|
expr: |
|
||||||
|
container_memory_working_set_bytes{container="kube-multus"}
|
||||||
|
/ container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
alert_channel: thermal_print
|
||||||
|
annotations:
|
||||||
|
summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
|
||||||
|
description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
|
||||||
|
# operator-leak avalanche pattern BEFORE it cascades into a multus
|
||||||
|
# CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
|
||||||
|
# emitting pods without ownerReferences will accumulate them when
|
||||||
|
# the operator crashes. >25 pending pods in any namespace for 30m
|
||||||
|
# is the signal to investigate the reconciler.
|
||||||
|
- alert: NamespacePendingPodBacklog
|
||||||
|
expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
|
||||||
|
description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
|
||||||
|
|
||||||
# Longhorn storage health alerts. Required: longhorn scrape job
|
# Longhorn storage health alerts. Required: longhorn scrape job
|
||||||
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
||||||
# for "snapshot becomes not ready to use" are transient lifecycle
|
# for "snapshot becomes not ready to use" are transient lifecycle
|
||||||
|
|||||||
@@ -188,13 +188,24 @@ spec:
|
|||||||
- name: kube-multus
|
- name: kube-multus
|
||||||
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||||
|
# 2026-05-11: upstream default of 50Mi memory limit OOM-cascades when
|
||||||
|
# an operator-owned namespace accumulates >100 pending pods retrying
|
||||||
|
# CNI ADD. RemoteDesktop emitted 219 orphan rd-browser-only pods
|
||||||
|
# (missing OwnerReferences), kubelet's CNI ADD avalanche pushed multus
|
||||||
|
# over 50Mi, OOMKilled, restarted with even bigger backlog → loop.
|
||||||
|
# 21h cluster outage. See FlowerCore.Notes:
|
||||||
|
# feedback_multus_50mi_limit_oom_orphan_pod_avalanche.md
|
||||||
|
# 1Gi limit / 512Mi request comfortably handles a 200+ pod CNI
|
||||||
|
# catchup burst on 64GB nodes (nodes are <25% used in steady-state).
|
||||||
|
# Drop back toward 256Mi only after MultusMemoryPressure alert
|
||||||
|
# proves steady-state working set sits well below 200Mi.
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
memory: "50Mi"
|
memory: "512Mi"
|
||||||
limits:
|
limits:
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
memory: "50Mi"
|
memory: "1Gi"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
terminationMessagePolicy: FallbackToLogsOnError
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
|||||||
@@ -127,10 +127,13 @@ spec:
|
|||||||
initContainers:
|
initContainers:
|
||||||
- name: fix-data-perms
|
- name: fix-data-perms
|
||||||
image: busybox:latest
|
image: busybox:latest
|
||||||
# Also chown /shared-tts (hostPath /tmp/tts-audio) so the non-root
|
# Must run as root to chown the hostPath /tmp/tts-audio that may be
|
||||||
# app user (uid 1654) can write Piper .sln16 files that Asterisk
|
# root-owned after node reboot. Pod-level runAsNonRoot:true would
|
||||||
# reads at /var/lib/asterisk/sounds/tts. World-readable (755) is
|
# otherwise inherit and chown would fail with EPERM (see Notes memory
|
||||||
# fine — Asterisk runs as a different uid in the other pod.
|
# feedback_hostpath_initcontainer_chown_perms).
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsNonRoot: false
|
||||||
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: telephony-data
|
- name: telephony-data
|
||||||
|
|||||||
@@ -291,6 +291,184 @@ public sealed class FleetManifestLintTests
|
|||||||
violations.Should().BeEmpty();
|
violations.Should().BeEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_MustShipExpectedManifestSet()
|
||||||
|
{
|
||||||
|
var appRoot = Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt");
|
||||||
|
Directory.Exists(appRoot).Should().BeTrue("Sprint 8 Cx-5 owns apps/fc-devicemgmt.");
|
||||||
|
|
||||||
|
var expectedFiles = new[]
|
||||||
|
{
|
||||||
|
"1password-item.yaml",
|
||||||
|
"argocd-application.yaml",
|
||||||
|
"certificate-web.yaml",
|
||||||
|
"clusterrole-operator.yaml",
|
||||||
|
"clusterrolebinding-operator.yaml",
|
||||||
|
"deployment-operator.yaml",
|
||||||
|
"deployment-web.yaml",
|
||||||
|
"ingressroute-web.yaml",
|
||||||
|
"namespace.yaml",
|
||||||
|
"network-policy.yaml",
|
||||||
|
"service-web.yaml",
|
||||||
|
"serviceaccount-operator.yaml",
|
||||||
|
};
|
||||||
|
|
||||||
|
Directory.GetFiles(appRoot, "*.yaml")
|
||||||
|
.Select(Path.GetFileName)
|
||||||
|
.Should()
|
||||||
|
.BeEquivalentTo(expectedFiles);
|
||||||
|
|
||||||
|
foreach (var expectedFile in expectedFiles)
|
||||||
|
{
|
||||||
|
FcDeviceManagementDocuments()
|
||||||
|
.Should()
|
||||||
|
.Contain(document => document.RelativePath == $"fc-devicemgmt/{expectedFile}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_ObjectsMustCarryStandardTraceabilityLabels()
|
||||||
|
{
|
||||||
|
var requiredLabels = new[]
|
||||||
|
{
|
||||||
|
"app.kubernetes.io/name",
|
||||||
|
"app.kubernetes.io/part-of",
|
||||||
|
"app.kubernetes.io/managed-by",
|
||||||
|
"flowercore.io/tenant-id",
|
||||||
|
"flowercore.io/created-by",
|
||||||
|
};
|
||||||
|
|
||||||
|
var violations = FcDeviceManagementDocuments()
|
||||||
|
.SelectMany(document => requiredLabels
|
||||||
|
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("metadata", "labels", label)))
|
||||||
|
.Select(label => $"{document.Descriptor} is missing metadata.labels['{label}']."))
|
||||||
|
.Concat(FcDeviceManagementDocuments()
|
||||||
|
.Where(document => document.Kind == "Deployment")
|
||||||
|
.SelectMany(document => requiredLabels
|
||||||
|
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "labels", label)))
|
||||||
|
.Select(label => $"{document.Descriptor} pod template is missing metadata.labels['{label}'].")))
|
||||||
|
.Concat(FcDeviceManagementDocuments()
|
||||||
|
.Where(document => document.Kind == "Deployment")
|
||||||
|
.Where(document => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "annotations", "flowercore.io/audit-trace-id")))
|
||||||
|
.Select(document => $"{document.Descriptor} pod template is missing flowercore.io/audit-trace-id."))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
violations.Should().BeEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_IngressMustUseCertManagerAndKeepPublicHostDisabled()
|
||||||
|
{
|
||||||
|
var appText = string.Join(
|
||||||
|
Environment.NewLine,
|
||||||
|
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
|
||||||
|
.Select(File.ReadAllText));
|
||||||
|
|
||||||
|
appText.Should().NotContain("certResolver");
|
||||||
|
appText.Should().Contain("update.flowercore.io");
|
||||||
|
appText.Should().Contain("disabled-until-Q-OIDC-1");
|
||||||
|
|
||||||
|
FcDeviceManagementDocuments()
|
||||||
|
.Where(document => document.Kind == "IngressRoute")
|
||||||
|
.SelectMany(document => document.MappingSequence("spec", "routes"))
|
||||||
|
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
|
||||||
|
.Should()
|
||||||
|
.Contain(match => match.Contains("Host(`devices.iamworkin.lan`)", StringComparison.Ordinal))
|
||||||
|
.And.NotContain(match => match.Contains("Host(`update.flowercore.io`)", StringComparison.Ordinal));
|
||||||
|
|
||||||
|
var certificate = FcDeviceManagementDocuments()
|
||||||
|
.Single(document => document.Kind == "Certificate" && document.Name == "fc-devicemgmt-web-tls");
|
||||||
|
|
||||||
|
certificate.Scalar("spec", "issuerRef", "name").Should().Be("step-ca-acme");
|
||||||
|
certificate.Scalar("spec", "issuerRef", "kind").Should().Be("ClusterIssuer");
|
||||||
|
ManifestNodeExtensions.ScalarSequence(certificate.Root, "spec", "dnsNames")
|
||||||
|
.Should()
|
||||||
|
.ContainSingle("devices.iamworkin.lan");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
|
||||||
|
{
|
||||||
|
var clusterRole = FcDeviceManagementDocuments()
|
||||||
|
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
|
||||||
|
var allScalars = clusterRole.AllScalars().ToList();
|
||||||
|
|
||||||
|
allScalars.Should().Contain("devices.flowercore.io");
|
||||||
|
allScalars.Should().Contain("*");
|
||||||
|
allScalars.Should().Contain("deployments");
|
||||||
|
allScalars.Should().Contain("get");
|
||||||
|
|
||||||
|
var operatorDeployment = FcDeviceManagementDocuments()
|
||||||
|
.Single(document => document.Kind == "Deployment" && document.Name == "fc-devicemgmt-operator");
|
||||||
|
|
||||||
|
operatorDeployment.AllScalars().Should().Contain("FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT");
|
||||||
|
operatorDeployment.AllScalars().Should().Contain("fc-devicemgmt-operator");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_RuntimeSecretsMustUseOnePasswordItemPattern()
|
||||||
|
{
|
||||||
|
var item = FcDeviceManagementDocuments()
|
||||||
|
.Single(document => document.Kind == "OnePasswordItem" && document.Name == "fc-devicemgmt-runtime");
|
||||||
|
|
||||||
|
item.Scalar("spec", "itemPath")
|
||||||
|
.Should()
|
||||||
|
.Be("vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime");
|
||||||
|
|
||||||
|
var appText = string.Join(
|
||||||
|
Environment.NewLine,
|
||||||
|
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
|
||||||
|
.Select(File.ReadAllText));
|
||||||
|
|
||||||
|
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
|
||||||
|
appText.Should().Contain("secretKeyRef:");
|
||||||
|
appText.Should().Contain("secretName: fc-devicemgmt-runtime");
|
||||||
|
appText.Should().NotContain("stringData:");
|
||||||
|
appText.Should().NotContain("from-literal");
|
||||||
|
appText.Should().NotContain("tls.key:");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_NetworkPoliciesMustAllowLanAgentsSynologyAndDnatPorts()
|
||||||
|
{
|
||||||
|
var policies = FcDeviceManagementDocuments()
|
||||||
|
.Where(document => document.Kind == "NetworkPolicy")
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
policies.Should().HaveCount(2);
|
||||||
|
|
||||||
|
var combinedScalars = policies.SelectMany(policy => policy.AllScalars()).ToList();
|
||||||
|
combinedScalars.Should().Contain("10.0.56.0/24");
|
||||||
|
combinedScalars.Should().Contain("10.0.57.0/24");
|
||||||
|
combinedScalars.Should().Contain("10.0.58.0/24");
|
||||||
|
combinedScalars.Should().Contain("10.0.68.0/27");
|
||||||
|
combinedScalars.Should().Contain("10.0.58.3/32");
|
||||||
|
|
||||||
|
var combinedEgressPorts = policies.SelectMany(policy => policy.EgressPorts()).ToHashSet(StringComparer.Ordinal);
|
||||||
|
combinedEgressPorts.Should().Contain(new[] { "80", "443", "8080", "8443", "2049", "111" });
|
||||||
|
|
||||||
|
var traefikVipPolicies = policies
|
||||||
|
.Where(policy => policy.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal)))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
traefikVipPolicies.Should().ContainSingle();
|
||||||
|
traefikVipPolicies[0].EgressPorts().Should().Contain(new[] { "80", "443", "8080", "8443" });
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void FcDeviceManagement_ArgocdApplicationMustMatchApplicationSetDiscoveryConventions()
|
||||||
|
{
|
||||||
|
var application = FcDeviceManagementDocuments()
|
||||||
|
.Single(document => document.Kind == "Application" && document.Name == "infra-fc-devicemgmt");
|
||||||
|
|
||||||
|
application.Namespace.Should().Be("argocd");
|
||||||
|
application.Scalar("spec", "source", "repoURL")
|
||||||
|
.Should()
|
||||||
|
.Be("http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git");
|
||||||
|
application.Scalar("spec", "source", "path").Should().Be("apps/fc-devicemgmt");
|
||||||
|
application.Scalar("spec", "destination", "namespace").Should().Be("fc-devicemgmt");
|
||||||
|
}
|
||||||
|
|
||||||
private static IEnumerable<string> ProbeViolations(
|
private static IEnumerable<string> ProbeViolations(
|
||||||
ManifestDocument document,
|
ManifestDocument document,
|
||||||
YamlMappingNode container,
|
YamlMappingNode container,
|
||||||
@@ -314,6 +492,13 @@ public sealed class FleetManifestLintTests
|
|||||||
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
|
||||||
|
{
|
||||||
|
return Inventory.Documents
|
||||||
|
.Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
|
||||||
|
.ToList();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal sealed class ManifestInventory
|
internal sealed class ManifestInventory
|
||||||
|
|||||||
Reference in New Issue
Block a user