Compare commits
44 Commits
claude/k8s
...
claude/ci1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b998f50f48 | ||
|
|
8fd9ae1cd3 | ||
|
|
fc2aca0e9e | ||
|
|
ba18c52130 | ||
|
|
9f6dc1a9d5 | ||
|
|
0bf47dfa33 | ||
|
|
87a7d7c70a | ||
|
|
1c4145a581 | ||
|
|
c50a403f74 | ||
|
|
fb7bd10528 | ||
|
|
6c21d14a98 | ||
|
|
b3529f8e96 | ||
|
|
00c11b4eaa | ||
|
|
04881f46f0 | ||
|
|
c0038e4859 | ||
|
|
dee48831c6 | ||
|
|
0f1dc5f871 | ||
|
|
11c5f6e6cc | ||
|
|
d637fe9b30 | ||
|
|
5bfe41beca | ||
|
|
df22774674 | ||
|
|
c4065b15a3 | ||
|
|
a4aa612373 | ||
|
|
c2eb37dee9 | ||
|
|
bf6f542569 | ||
|
|
e150b2102f | ||
|
|
33a765b0bc | ||
|
|
5484ed7db6 | ||
|
|
2aa84349ea | ||
|
|
851f8e673b | ||
|
|
f78f8c8192 | ||
|
|
9b255fefc1 | ||
|
|
6a89a76e39 | ||
|
|
2489464d4f | ||
|
|
4b777b16ac | ||
|
|
8c60e3a4d3 | ||
|
|
df02b4c3c3 | ||
|
|
c0dceafffd | ||
|
|
490db8f9e6 | ||
|
|
1926bdaf3b | ||
|
|
ca8d062826 | ||
|
|
1889462fc4 | ||
|
|
523ba61232 | ||
|
|
53f67c8713 |
69
apps/cdi/README.md
Normal file
69
apps/cdi/README.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# CDI — Containerized Data Importer
|
||||||
|
|
||||||
|
KubeVirt's `containerized-data-importer` for populating PVCs from external
|
||||||
|
sources (HTTP, HTTPS, container registry, S3, virtctl upload). Required to
|
||||||
|
import the Windows Server 2025 ISO into the `windows-server-2025-iso` PVC
|
||||||
|
that `apps/kubevirt-vms/ci1.yaml` mounts as a CDROM.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Source | Purpose |
|
||||||
|
| ----------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------- |
|
||||||
|
| `cdi-operator.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — verbatim copy | Installs operator + CRDs (5779 lines, large) |
|
||||||
|
| `cdi-cr.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — annotated + commented | Tells operator to deploy CDI components |
|
||||||
|
|
||||||
|
`cdi-operator.yaml` is **vendored verbatim** from the upstream release for
|
||||||
|
air-gap reproducibility (no internet fetch at deploy time, ArgoCD prune
|
||||||
|
contracts hold). To bump versions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CDI_VER=v1.66.0 # for example
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-operator.yaml" \
|
||||||
|
-o apps/cdi/cdi-operator.yaml
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-cr.yaml" \
|
||||||
|
-o /tmp/cdi-cr-new.yaml # then re-apply project header diff
|
||||||
|
git diff apps/cdi/ # review
|
||||||
|
git commit + push
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verify after deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n cdi get pods # operator + apiserver + deployment + uploadproxy
|
||||||
|
kubectl get cdis cdi -o jsonpath='{.status.phase}' # "Deployed"
|
||||||
|
kubectl get crd | grep cdi.kubevirt.io
|
||||||
|
# Expected CRDs: datavolumes.cdi.kubevirt.io, cdiconfigs.cdi.kubevirt.io,
|
||||||
|
# storageprofiles.cdi.kubevirt.io, dataimportcrons.cdi.kubevirt.io,
|
||||||
|
# datasources.cdi.kubevirt.io, objecttransfers.cdi.kubevirt.io
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use after install
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Example DataVolume that imports from HTTP
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: DataVolume
|
||||||
|
metadata:
|
||||||
|
name: my-iso
|
||||||
|
spec:
|
||||||
|
source:
|
||||||
|
http:
|
||||||
|
url: "https://server/path/to.iso"
|
||||||
|
pvc:
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Or upload from local disk via virtctl
|
||||||
|
virtctl image-upload pvc my-iso \
|
||||||
|
--image-path ./my.iso \
|
||||||
|
--size 10Gi \
|
||||||
|
--storage-class longhorn \
|
||||||
|
--access-mode ReadWriteOnce \
|
||||||
|
--uploadproxy-url https://cdi-uploadproxy.cdi.svc:443 \
|
||||||
|
--insecure
|
||||||
|
```
|
||||||
36
apps/cdi/cdi-cr.yaml
Normal file
36
apps/cdi/cdi-cr.yaml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# CDI CR — Tells the CDI operator to install CDI components into the cluster.
|
||||||
|
# =============================================================================
|
||||||
|
# After cdi-operator.yaml is applied, the operator watches for THIS resource
|
||||||
|
# (CDI named "cdi"). When found, it deploys cdi-apiserver, cdi-deployment,
|
||||||
|
# cdi-uploadproxy, cdi-cronjob, and the importer/uploadserver/cloner pods.
|
||||||
|
#
|
||||||
|
# Configuration:
|
||||||
|
# - HonorWaitForFirstConsumer: PVCs created by DataVolumes wait for first
|
||||||
|
# pod to schedule before binding (lets storage class pick best node).
|
||||||
|
# - WebhookPvcRendering: validates PVC creation against CDI policies.
|
||||||
|
# - imagePullPolicy IfNotPresent: re-pull only on tag rotation.
|
||||||
|
# - nodeSelector linux: pin to Linux nodes (no Windows worker support).
|
||||||
|
#
|
||||||
|
# Andrew may want to add a `uploadProxyURLOverride` later to expose the
|
||||||
|
# uploadproxy via Traefik IngressRoute for `virtctl image-upload` from
|
||||||
|
# BLUEJAY-WS without `kubectl port-forward`. Phase 2 enhancement.
|
||||||
|
# =============================================================================
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: CDI
|
||||||
|
metadata:
|
||||||
|
name: cdi
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "kubevirt/containerized-data-importer v1.65.0"
|
||||||
|
spec:
|
||||||
|
config:
|
||||||
|
featureGates:
|
||||||
|
- HonorWaitForFirstConsumer
|
||||||
|
- WebhookPvcRendering
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
infra:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
|
workload:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
5779
apps/cdi/cdi-operator.yaml
Normal file
5779
apps/cdi/cdi-operator.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,18 @@
|
|||||||
# FlowerCore Remote Desktop — TLS + Ingress
|
# FlowerCore Remote Desktop — TLS + Ingress
|
||||||
# Deployment and Service managed by deploy script (not ArgoCD)
|
#
|
||||||
|
# Source-of-truth split:
|
||||||
|
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies
|
||||||
|
# (see network-policies.yaml in this directory).
|
||||||
|
# - FlowerCore.RemoteDesktop scripts/deploy-web.sh OWNS: Deployment +
|
||||||
|
# Service. Reason: image refs like `localhost/fc-desktop:linux-xfce`
|
||||||
|
# only exist on each node's containerd after a manual import, so a
|
||||||
|
# Deployment manifest in bluejay-infra would race the image-import
|
||||||
|
# step and crash-loop.
|
||||||
|
#
|
||||||
|
# NetworkPolicies moved into bluejay-infra 2026-05-07 — previously they
|
||||||
|
# were applied via the deploy script's kubectl apply calls, which broke
|
||||||
|
# cluster-rebuild repeatability. See
|
||||||
|
# feedback_networkpolicies_belong_in_bluejay_infra.md.
|
||||||
---
|
---
|
||||||
apiVersion: cert-manager.io/v1
|
apiVersion: cert-manager.io/v1
|
||||||
kind: Certificate
|
kind: Certificate
|
||||||
|
|||||||
332
apps/fc-desktop/network-policies.yaml
Normal file
332
apps/fc-desktop/network-policies.yaml
Normal file
@@ -0,0 +1,332 @@
|
|||||||
|
# FlowerCore Remote Desktop — NetworkPolicies (GitOps-managed)
|
||||||
|
#
|
||||||
|
# Moved into bluejay-infra 2026-05-07 as part of the regroup audit. These
|
||||||
|
# four policies were previously applied via FlowerCore.RemoteDesktop's
|
||||||
|
# scripts/deploy-web.sh `kubectl apply` calls, which meant a fresh cluster
|
||||||
|
# rebuild from bluejay-infra alone would miss them — Browser Lab session
|
||||||
|
# isolation, control-plane allow-list, and HTTP-01 cert renewal would all
|
||||||
|
# silently fail to come up.
|
||||||
|
#
|
||||||
|
# Source-of-truth contract:
|
||||||
|
# - bluejay-infra OWNS all NetworkPolicy + Certificate + IngressRoute
|
||||||
|
# resources for fc-desktop.
|
||||||
|
# - FlowerCore.RemoteDesktop's scripts/deploy-web.sh continues to own
|
||||||
|
# the Deployment + Service apply (because the image ref
|
||||||
|
# `localhost/fc-desktop:linux-xfce` only exists on each node's
|
||||||
|
# containerd after a manual import — it can't be pulled from a
|
||||||
|
# registry, so a Deployment manifest in bluejay-infra would race the
|
||||||
|
# image-import step and crash-loop).
|
||||||
|
---
|
||||||
|
# 1) desktop-isolation — Browser Lab session pods.
|
||||||
|
#
|
||||||
|
# Locks down pods labeled `app.kubernetes.io/name=remote-desktop` (every
|
||||||
|
# session pod regardless of template). Allows guacd ingress for the VNC/RDP
|
||||||
|
# display lane and remotedesktop-web's pre-handoff probing. Egress: NFS to
|
||||||
|
# Synology, DNS, Traefik (cluster + LB VIP), Intranet (Browser Lab home).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: desktop-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remote-desktop
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 445
|
||||||
|
protocol: TCP
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.33.87/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 2) fc-desktop-default-deny — namespace-wide catch-all.
|
||||||
|
#
|
||||||
|
# Selects every pod EXCEPT remotedesktop-web (the public-surface control
|
||||||
|
# plane) and applies default-deny semantics for both Ingress and Egress.
|
||||||
|
# Closes the gap where session pods land WITHOUT the desktop-isolation
|
||||||
|
# policy's `app.kubernetes.io/name=remote-desktop` label, plus prevents
|
||||||
|
# arbitrary debug sidecars / kubectl debug images from getting cluster
|
||||||
|
# access.
|
||||||
|
#
|
||||||
|
# CRITICAL: also catches transient cm-acme-http-solver pods (that's the
|
||||||
|
# bug this whole regroup chased). The cm-acme-http-solver-allow policy
|
||||||
|
# below is the explicit carve-out.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-desktop-default-deny
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app.kubernetes.io/name
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
---
|
||||||
|
# 3) remotedesktop-web-isolation — control plane explicit allow-list.
|
||||||
|
#
|
||||||
|
# remotedesktop-web is the only pod label the default-deny excludes, so
|
||||||
|
# without this policy the control plane would have wide-open Ingress AND
|
||||||
|
# Egress. This re-introduces a tight allow-list:
|
||||||
|
# - Ingress: Traefik only on TCP/8080
|
||||||
|
# - Egress: CoreDNS, K8s API, Guacamole admin, NFS, Intranet,
|
||||||
|
# Traefik (cluster + LB), and the fc-desktop namespace itself
|
||||||
|
# (for session pod readiness probing).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: remotedesktop-web-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# K8s API server
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Guacamole admin
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
# Intranet web
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
# Cluster Traefik pods (in-cluster service resolution + Guacamole
|
||||||
|
# routing handoff where web app builds URLs against the public host
|
||||||
|
# but resolves internally).
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# fc-desktop namespace — session pod probing during browser-access
|
||||||
|
# readiness checks.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 4) cm-acme-http-solver-allow — cert-manager HTTP-01 carve-out.
|
||||||
|
#
|
||||||
|
# Without this, fc-desktop-default-deny catches the transient solver pods
|
||||||
|
# cert-manager creates for each renewal (they don't carry the
|
||||||
|
# remotedesktop-web label). Caused 8-day silent renewal failure on
|
||||||
|
# desktop.iamworkin.lan in 2026-04-28..2026-05-07 (see
|
||||||
|
# feedback_certmanager_renewal_stuck_when_solver_blocked_by_namespace_default_deny.md).
|
||||||
|
#
|
||||||
|
# Authorizes:
|
||||||
|
# - Ingress on TCP/8089 from cluster Traefik (which proxies the external
|
||||||
|
# HTTP-01 GET on port 80 through to the solver).
|
||||||
|
# - Egress for cluster DNS (defensive — newer cert-manager probes from
|
||||||
|
# inside the solver too).
|
||||||
|
#
|
||||||
|
# The `acme.cert-manager.io/http01-solver=true` label is set by
|
||||||
|
# cert-manager itself on every solver pod automatically.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: cm-acme-http-solver-allow
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: cert-renewal
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
acme.cert-manager.io/http01-solver: "true"
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
@@ -118,7 +118,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
||||||
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-distribution:v202604240010
|
image: localhost/fc-distribution:v202605061948
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
@@ -151,6 +151,10 @@ spec:
|
|||||||
value: "/signing/aistation-field/chain.pem"
|
value: "/signing/aistation-field/chain.pem"
|
||||||
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
||||||
value: "/signing/aistation-field/private-key.pem"
|
value: "/signing/aistation-field/private-key.pem"
|
||||||
|
# Public distribution host is GET/HEAD-only at Traefik; this
|
||||||
|
# entitlement list controls which editions are readable there.
|
||||||
|
- name: FlowerCore__Distribution__EntitlementPublic__PublicEditions__0
|
||||||
|
value: "*"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
@@ -262,8 +266,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- dist.iamworkin.lan
|
- dist.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10880+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import unicodedata
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
|
|||||||
volume: int = 100 # 0-200
|
volume: int = 100 # 0-200
|
||||||
|
|
||||||
|
|
||||||
|
HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
|
||||||
|
HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
|
||||||
|
|
||||||
|
# eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
|
||||||
|
# names on some builds. For source-text study reads, prefer a stable
|
||||||
|
# scholarly transliteration so words sound like words even without niqqud.
|
||||||
|
HEBREW_WORD_TRANSLITERATIONS = {
|
||||||
|
"אב": "av",
|
||||||
|
"אבא": "abba",
|
||||||
|
"אברהם": "Avraham",
|
||||||
|
"אדמה": "adamah",
|
||||||
|
"אדני": "Adonai",
|
||||||
|
"אדם": "adam",
|
||||||
|
"אור": "or",
|
||||||
|
"אלהים": "Elohim",
|
||||||
|
"אלוהים": "Elohim",
|
||||||
|
"אמן": "amen",
|
||||||
|
"אם": "em",
|
||||||
|
"אמת": "emet",
|
||||||
|
"ארץ": "eretz",
|
||||||
|
"אש": "esh",
|
||||||
|
"את": "et",
|
||||||
|
"בית": "beit",
|
||||||
|
"בן": "ben",
|
||||||
|
"ברא": "bara",
|
||||||
|
"בראשית": "bereshit",
|
||||||
|
"ברית": "berit",
|
||||||
|
"ברוך": "barukh",
|
||||||
|
"בת": "bat",
|
||||||
|
"גוי": "goy",
|
||||||
|
"גוים": "goyim",
|
||||||
|
"גויים": "goyim",
|
||||||
|
"דבר": "davar",
|
||||||
|
"דברים": "devarim",
|
||||||
|
"דוד": "David",
|
||||||
|
"הלל": "hallel",
|
||||||
|
"הארץ": "ha-aretz",
|
||||||
|
"הברית": "ha-berit",
|
||||||
|
"החדשה": "ha-chadashah",
|
||||||
|
"השמים": "ha-shamayim",
|
||||||
|
"השמיים": "ha-shamayim",
|
||||||
|
"ויאמר": "vayomer",
|
||||||
|
"יהוה": "Adonai",
|
||||||
|
"יוסף": "Yosef",
|
||||||
|
"יוחנן": "Yochanan",
|
||||||
|
"ישראל": "Yisrael",
|
||||||
|
"ישוע": "Yeshua",
|
||||||
|
"יצחק": "Yitzchak",
|
||||||
|
"יעקב": "Yaakov",
|
||||||
|
"ירושלים": "Yerushalayim",
|
||||||
|
"כהן": "kohen",
|
||||||
|
"כהנים": "kohanim",
|
||||||
|
"מים": "mayim",
|
||||||
|
"מות": "mavet",
|
||||||
|
"מושיע": "moshia",
|
||||||
|
"מלך": "melekh",
|
||||||
|
"מלכות": "malkhut",
|
||||||
|
"מרים": "Miriam",
|
||||||
|
"משה": "Moshe",
|
||||||
|
"משיח": "Mashiach",
|
||||||
|
"נביא": "navi",
|
||||||
|
"נביאים": "neviim",
|
||||||
|
"עם": "am",
|
||||||
|
"עולם": "olam",
|
||||||
|
"צדק": "tzedek",
|
||||||
|
"קדוש": "qadosh",
|
||||||
|
"קדושים": "qedoshim",
|
||||||
|
"קול": "qol",
|
||||||
|
"רוח": "ruach",
|
||||||
|
"שאול": "Shaul",
|
||||||
|
"שמים": "shamayim",
|
||||||
|
"שמיים": "shamayim",
|
||||||
|
"שמעון": "Shimon",
|
||||||
|
"שלום": "Shalom",
|
||||||
|
"תורה": "torah",
|
||||||
|
"חכמה": "chokhmah",
|
||||||
|
"חסד": "chesed",
|
||||||
|
"חיים": "chayim",
|
||||||
|
"חושך": "choshekh",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_LETTERS = {
|
||||||
|
"א": "a",
|
||||||
|
"ב": "b",
|
||||||
|
"ג": "g",
|
||||||
|
"ד": "d",
|
||||||
|
"ה": "h",
|
||||||
|
"ו": "v",
|
||||||
|
"ז": "z",
|
||||||
|
"ח": "kh",
|
||||||
|
"ט": "t",
|
||||||
|
"י": "y",
|
||||||
|
"כ": "kh",
|
||||||
|
"ך": "kh",
|
||||||
|
"ל": "l",
|
||||||
|
"מ": "m",
|
||||||
|
"ם": "m",
|
||||||
|
"נ": "n",
|
||||||
|
"ן": "n",
|
||||||
|
"ס": "s",
|
||||||
|
"ע": "a",
|
||||||
|
"פ": "p",
|
||||||
|
"ף": "f",
|
||||||
|
"צ": "ts",
|
||||||
|
"ץ": "ts",
|
||||||
|
"ק": "q",
|
||||||
|
"ר": "r",
|
||||||
|
"ש": "sh",
|
||||||
|
"ת": "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_hebrew_marks(value: str) -> str:
|
||||||
|
decomposed = unicodedata.normalize("NFD", value)
|
||||||
|
return "".join(
|
||||||
|
ch for ch in decomposed
|
||||||
|
if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_hebrew_transliteration(word: str) -> str:
|
||||||
|
tokens: list[str] = []
|
||||||
|
chars = list(word)
|
||||||
|
for index, ch in enumerate(chars):
|
||||||
|
token = HEBREW_LETTERS.get(ch)
|
||||||
|
if token is None:
|
||||||
|
continue
|
||||||
|
if ch == "ה" and index == len(chars) - 1:
|
||||||
|
token = "ah"
|
||||||
|
elif ch == "י" and index > 0:
|
||||||
|
token = "i"
|
||||||
|
elif ch == "ו" and index > 0:
|
||||||
|
token = "o"
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return word
|
||||||
|
|
||||||
|
spoken: list[str] = []
|
||||||
|
for index, token in enumerate(tokens):
|
||||||
|
spoken.append(token)
|
||||||
|
next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
|
||||||
|
if (
|
||||||
|
token[-1:] not in HEBREW_VOWELISH
|
||||||
|
and next_token
|
||||||
|
and next_token[:1] not in HEBREW_VOWELISH
|
||||||
|
):
|
||||||
|
spoken.append("a")
|
||||||
|
return "".join(spoken)
|
||||||
|
|
||||||
|
|
||||||
|
def _transliterate_hebrew_word(match: re.Match[str]) -> str:
|
||||||
|
original = match.group(0)
|
||||||
|
normalized = _strip_hebrew_marks(original)
|
||||||
|
if not normalized:
|
||||||
|
return original
|
||||||
|
|
||||||
|
direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
|
||||||
|
if direct:
|
||||||
|
return direct
|
||||||
|
|
||||||
|
if normalized.startswith("ו") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ve-{rest}"
|
||||||
|
|
||||||
|
if normalized.startswith("ה") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ha-{rest}"
|
||||||
|
|
||||||
|
return _fallback_hebrew_transliteration(normalized)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
|
||||||
|
if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
|
||||||
|
spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
|
||||||
|
return spoken, "en-us"
|
||||||
|
return text, voice
|
||||||
|
|
||||||
|
|
||||||
def _resolve_voice(req: TtsRequest) -> str:
|
def _resolve_voice(req: TtsRequest) -> str:
|
||||||
if req.voice:
|
if req.voice:
|
||||||
return req.voice.strip()
|
return req.voice.strip()
|
||||||
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
|
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
args = [
|
args = [
|
||||||
"--stdout",
|
"--stdout",
|
||||||
"-v", voice,
|
"-v", synth_voice,
|
||||||
"-s", str(max(80, min(450, req.rate))),
|
"-s", str(max(80, min(450, req.rate))),
|
||||||
"-p", str(max(0, min(99, req.pitch))),
|
"-p", str(max(0, min(99, req.pitch))),
|
||||||
"-a", str(max(0, min(200, req.volume))),
|
"-a", str(max(0, min(200, req.volume))),
|
||||||
]
|
]
|
||||||
wav = _run_espeak(args, req.text.encode("utf-8"))
|
wav = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
if not wav:
|
if not wav:
|
||||||
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
||||||
return Response(content=wav, media_type="audio/wav")
|
return Response(content=wav, media_type="audio/wav")
|
||||||
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
|
def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
|
||||||
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
||||||
out = _run_espeak(args, req.text.encode("utf-8"))
|
out = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
text = out.decode("utf-8", errors="replace")
|
text = out.decode("utf-8", errors="replace")
|
||||||
total = 0
|
total = 0
|
||||||
for match in PHONEME_DURATION_RE.finditer(text):
|
for match in PHONEME_DURATION_RE.finditer(text):
|
||||||
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
|
|||||||
if not req.text.strip():
|
if not req.text.strip():
|
||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
total_ms = _estimate_total_ms(req, voice)
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
|
total_ms = _estimate_total_ms(req, synth_voice, spoken_text)
|
||||||
|
|
||||||
# Distribute total_ms across whitespace-split words proportional to
|
# Distribute total_ms across whitespace-split words proportional to
|
||||||
# character count. Punctuation-only tokens are folded into the previous
|
# character count. Punctuation-only tokens are folded into the previous
|
||||||
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
|
|||||||
{
|
{
|
||||||
"text": req.text,
|
"text": req.text,
|
||||||
"language": req.language,
|
"language": req.language,
|
||||||
"voice": voice,
|
"voice": synth_voice,
|
||||||
"words": out_words,
|
"words": out_words,
|
||||||
"durationMs": total_ms,
|
"durationMs": total_ms,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -359,7 +359,7 @@ spec:
|
|||||||
runAsUser: 1654
|
runAsUser: 1654
|
||||||
containers:
|
containers:
|
||||||
- name: biblical-tts
|
- name: biblical-tts
|
||||||
image: localhost/fc-biblical-tts:v1
|
image: localhost/fc-biblical-tts:v20260506-hebrew-translit
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 10402
|
- containerPort: 10402
|
||||||
@@ -532,7 +532,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-ttsreader-web:v202604301236-b6ca2d5
|
image: localhost/fc-ttsreader-web:v20260506-phase6
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5217
|
- containerPort: 5217
|
||||||
@@ -568,6 +568,14 @@ spec:
|
|||||||
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
||||||
- name: TtsReader__Kokoro__TimeoutSeconds
|
- name: TtsReader__Kokoro__TimeoutSeconds
|
||||||
value: "120"
|
value: "120"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__BaseUrl
|
||||||
|
value: "http://ttsreader-biblical.fc-ttsreader.svc.cluster.local.:10402"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__TimeoutSeconds
|
||||||
|
value: "60"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__DefaultLanguage
|
||||||
|
value: "grc"
|
||||||
- name: Speech__Alignment__Enabled
|
- name: Speech__Alignment__Enabled
|
||||||
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
||||||
# ttsreader-align deployment in this manifest wraps
|
# ttsreader-align deployment in this manifest wraps
|
||||||
@@ -603,6 +611,8 @@ spec:
|
|||||||
# the writable PVC mount.
|
# the writable PVC mount.
|
||||||
- name: TtsReader__Preview__CacheDirectory
|
- name: TtsReader__Preview__CacheDirectory
|
||||||
value: "/data/voice-previews"
|
value: "/data/voice-previews"
|
||||||
|
- name: TtsReader__VoiceLibrary__ReferenceClip__Directory
|
||||||
|
value: "/data/voice-reference-clips"
|
||||||
# Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
|
# Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
|
||||||
# POST /api/v1/render. Default "wwwroot/cdn" resolves under the
|
# POST /api/v1/render. Default "wwwroot/cdn" resolves under the
|
||||||
# read-only app filesystem, so pin to the writable PVC mount
|
# read-only app filesystem, so pin to the writable PVC mount
|
||||||
|
|||||||
47
apps/fc-updater/README.md
Normal file
47
apps/fc-updater/README.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# fc-updater — Update Center GitOps adoption
|
||||||
|
|
||||||
|
**Status:** adopted into `bluejay-infra` on 2026-05-06. The live ArgoCD
|
||||||
|
Application is `infra-fc-updater`, generated by the `bluejay-infra`
|
||||||
|
ApplicationSet with automated sync, `prune: true`, and `selfHeal: true`.
|
||||||
|
|
||||||
|
## Managed manifest set
|
||||||
|
|
||||||
|
`apps/fc-updater/fc-updater.yaml` manages:
|
||||||
|
|
||||||
|
- `Namespace/fc-updater`
|
||||||
|
- `PersistentVolumeClaim/updatecenter-data`
|
||||||
|
- `Deployment/updatecenter-web`
|
||||||
|
- `Service/updatecenter-web`
|
||||||
|
- `Certificate/updatecenter-web-tls`
|
||||||
|
- `Certificate/updatecenter-web-internal-tls`
|
||||||
|
- `IngressRoute/updatecenter-web`
|
||||||
|
- `IngressRoute/updatecenter-web-internal`
|
||||||
|
- `IngressRoute/updatecenter-web-public`
|
||||||
|
|
||||||
|
The Deployment intentionally sets `revisionHistoryLimit: 3` and
|
||||||
|
`strategy.type: Recreate`. The service is singleton + SQLite/local bundle
|
||||||
|
storage on `PersistentVolumeClaim/updatecenter-data`, pinned to
|
||||||
|
`rke2-server`.
|
||||||
|
|
||||||
|
## Runtime dependencies intentionally not stored here
|
||||||
|
|
||||||
|
These live Secrets are pre-existing runtime material and are not committed to
|
||||||
|
Git:
|
||||||
|
|
||||||
|
- `updater-bootstrap-auth`
|
||||||
|
- `updater-signing`
|
||||||
|
- `updater-webhooks`
|
||||||
|
- `cf-origin-flowercore-io`
|
||||||
|
|
||||||
|
Rotate the Cloudflare Origin Certificate through
|
||||||
|
`FlowerCore.Notes/docs/standards/code-signing-rotation-runbook.md`; the
|
||||||
|
shared origin cert must exist in every namespace that serves a
|
||||||
|
`*.flowercore.io` public IngressRoute.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n argocd get application infra-fc-updater
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n fc-updater get deploy,svc,ingressroute,certificate,pvc
|
||||||
|
curl.exe -sk https://update.flowercore.io/api/v1/manifests/_schema
|
||||||
|
```
|
||||||
269
apps/fc-updater/fc-updater.yaml
Normal file
269
apps/fc-updater/fc-updater.yaml
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
# FlowerCore Update Center
|
||||||
|
# GitOps adoption of the live fc-updater namespace after PUB-1/PUB-3.
|
||||||
|
# Runtime credentials remain in existing K8s Secrets; do not store them here.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-data
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
# Sized for fleet bundle storage (LocalFsBundleStore.MaxTotalBytes
|
||||||
|
# soft cap at 25 GiB per project_uc_remaining_4_apps_signed_2026_05_06).
|
||||||
|
# Mike Bundle alone is ~5.1 GiB; cluster live capacity is already
|
||||||
|
# 20 GiB after a manual expand. PVCs cannot shrink, so git must track
|
||||||
|
# at least the live size to avoid the OutOfSync loop.
|
||||||
|
storage: 25Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# SQLite + local bundle storage live on a single RWO PVC. Recreate avoids
|
||||||
|
# two pods overlapping the same write path during future image bumps.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: updatecenter-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
spec:
|
||||||
|
nodeName: rke2-server
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-updater-web:v20260508-pub3-deepening-2bdf108
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: http://+:8080
|
||||||
|
- name: FlowerCore__Updater__Database__Provider
|
||||||
|
value: sqlite
|
||||||
|
- name: FlowerCore__Updater__Database__ConnectionString
|
||||||
|
value: Data Source=/data/updatecenter.db
|
||||||
|
- name: FlowerCore__Updater__BundleStorage__LocalFs__RootDirectory
|
||||||
|
value: /data/bundles
|
||||||
|
- name: FlowerCore__Updater__PublicShares__RequirePublicVisibilityOnPublicHosts
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Code
|
||||||
|
value: 8f3c2a9e7d41
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__AppId
|
||||||
|
value: flowercore.faith-ai-mike
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Channel
|
||||||
|
value: stable
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__RuntimeId
|
||||||
|
value: win-x64
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__DisplayName
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Headline
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Description
|
||||||
|
value: Private release link for Mike's Faith AI bundle.
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Username
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: username
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: password
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__SigningKey
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: signing-key
|
||||||
|
- name: FlowerCore__Updater__Signing__AutoSignOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__RequireSignatureOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxBase64
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-base64
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxPassword
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-password
|
||||||
|
- name: FlowerCore__Updater__Signing__OpItemReference
|
||||||
|
value: op://FlowerCore/step-ca-codesign
|
||||||
|
- name: FlowerCore__Updater__Signing__TrustAnchorPath
|
||||||
|
value: /etc/flowercore-updater/signing/root-ca.pem
|
||||||
|
- name: FlowerCore__Updater__GitHub__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-token
|
||||||
|
- name: FlowerCore__Updater__GitHub__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-webhook-secret
|
||||||
|
- name: FlowerCore__Updater__Gitea__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-token
|
||||||
|
- name: FlowerCore__Updater__Gitea__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-webhook-secret
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 15
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: signing
|
||||||
|
mountPath: /etc/flowercore-updater/signing
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: updatecenter-data
|
||||||
|
- name: signing
|
||||||
|
secret:
|
||||||
|
secretName: updater-signing
|
||||||
|
items:
|
||||||
|
- key: root-ca.pem
|
||||||
|
path: root-ca.pem
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: updatecenter-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: http
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter.iamworkin.lan
|
||||||
|
- updates.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter-internal.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`updatecenter.iamworkin.lan`) || Host(`updates.iamworkin.lan`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`updatecenter-internal.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-public
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`update.flowercore.io`) || Host(`updates.flowercore.io`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: cf-origin-flowercore-io
|
||||||
7
apps/fc-updater/kustomization.yaml
Normal file
7
apps/fc-updater/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# ArgoCD's bluejay-infra ApplicationSet uses a directory generator and does
|
||||||
|
# not require kustomization.yaml. Keep this anyway as the manifest inventory
|
||||||
|
# and for local `kubectl kustomize apps/fc-updater` previews.
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- fc-updater.yaml
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
# FlowerCore Tenant — flowercore.io (main brand)
|
# FlowerCore Tenant — retired flowercore.io placeholder.
|
||||||
# Public-facing placeholder landing page served by nginx
|
#
|
||||||
|
# Public flowercore.io/www.flowercore.io routing is now owned by
|
||||||
|
# apps/fc-landing/fc-landing.yaml. This tenant placeholder remains available
|
||||||
|
# only as an in-cluster service; do not create a duplicate public
|
||||||
|
# IngressRoute here because it competes with fc-landing and requires a
|
||||||
|
# namespace-local cf-origin-flowercore-io Secret.
|
||||||
# ArgoCD managed - BlueJay Lab
|
# ArgoCD managed - BlueJay Lab
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -10,12 +15,6 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: bluejay-infra
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
flowercore.io/tenant: flowercore
|
flowercore.io/tenant: flowercore
|
||||||
---
|
---
|
||||||
# NOTE: The existing cf-origin-flowercore-io secret (covering *.flowercore.io)
|
|
||||||
# must be copied into this namespace. It already exists in other namespaces.
|
|
||||||
# Copy with: kubectl get secret cf-origin-flowercore-io -n fc-system -o yaml \
|
|
||||||
# | sed 's/namespace: .*/namespace: tenant-flowercore/' \
|
|
||||||
# | kubectl apply -f -
|
|
||||||
---
|
|
||||||
# Landing page HTML
|
# Landing page HTML
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -311,22 +310,3 @@ spec:
|
|||||||
- port: 80
|
- port: 80
|
||||||
targetPort: 80
|
targetPort: 80
|
||||||
name: http
|
name: http
|
||||||
---
|
|
||||||
# Traefik IngressRoute — public via Cloudflare
|
|
||||||
# Uses existing cf-origin-flowercore-io cert (must be copied to this namespace)
|
|
||||||
apiVersion: traefik.io/v1alpha1
|
|
||||||
kind: IngressRoute
|
|
||||||
metadata:
|
|
||||||
name: flowercore-web
|
|
||||||
namespace: tenant-flowercore
|
|
||||||
spec:
|
|
||||||
entryPoints:
|
|
||||||
- websecure
|
|
||||||
routes:
|
|
||||||
- match: Host(`flowercore.io`) || Host(`www.flowercore.io`)
|
|
||||||
kind: Rule
|
|
||||||
services:
|
|
||||||
- name: flowercore-web
|
|
||||||
port: 80
|
|
||||||
tls:
|
|
||||||
secretName: cf-origin-flowercore-io
|
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: intranet-web
|
- name: intranet-web
|
||||||
image: localhost/fc-intranet-web:v20260429-1646
|
image: localhost/fc-intranet-web:v20260508-brochure-w1
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5300
|
- containerPort: 5300
|
||||||
|
|||||||
@@ -241,8 +241,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- knowledge.iamworkin.lan
|
- knowledge.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10888+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
487
apps/kubevirt-vms/ci1.yaml
Normal file
487
apps/kubevirt-vms/ci1.yaml
Normal file
@@ -0,0 +1,487 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# ci1 — Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: dedicated CI runner for FlowerCore.Updater Sandbox E2E nightly +
|
||||||
|
# future fleet WPF AAT lanes. Replaces the never-registered
|
||||||
|
# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
|
||||||
|
# BLUEJAY-WS registered as a runner (workstation has personal/operator state).
|
||||||
|
#
|
||||||
|
# Storage layout (2026-05-08):
|
||||||
|
# * ISO is now sourced from Synology NFS (Path B) — see
|
||||||
|
# win2025-iso-nfs-pv.yaml. The Longhorn Filesystem PVC
|
||||||
|
# `windows-server-2025-iso` below is RETAINED but UNUSED so the prior
|
||||||
|
# CDI upload state is preserved as a fallback (and so ArgoCD doesn't
|
||||||
|
# prune it on this commit). It can be deleted in a follow-up commit
|
||||||
|
# after the NFS path is proven on a successful Windows install.
|
||||||
|
#
|
||||||
|
# Status (2026-05-08): LIVE — Phase 1 prereqs satisfied:
|
||||||
|
# * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes
|
||||||
|
# (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy)
|
||||||
|
# * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet
|
||||||
|
# `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward)
|
||||||
|
# * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to
|
||||||
|
# PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete)
|
||||||
|
# * Local Administrator password generated, stored in 1Password vault
|
||||||
|
# IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu
|
||||||
|
# * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/
|
||||||
|
# prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5
|
||||||
|
# host bridge work lands (Puppet br-prod + enp86s0.57); switching is a
|
||||||
|
# one-line YAML edit + git push.
|
||||||
|
#
|
||||||
|
# See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
|
||||||
|
#
|
||||||
|
# Network choice in this draft: **pod-network fallback** (Calico default).
|
||||||
|
# Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
|
||||||
|
# polls GitHub Actions over HTTPS; no inbound listener needed). Switch to a
|
||||||
|
# Multus PROD VLAN NetworkAttachmentDefinition once Multus is installed and the
|
||||||
|
# operator wants L2 access from `ci1` to other PROD VLAN services.
|
||||||
|
#
|
||||||
|
# Sizing: 8 vCPU / 16 GB RAM / 200 GB disk on Longhorn (default storageClass).
|
||||||
|
# Capacity check 2026-05-08: each RKE2 node has 16 vCPU / ~64Gi allocatable;
|
||||||
|
# 8 vCPU is ~17% of one node's allocatable, fits comfortably.
|
||||||
|
#
|
||||||
|
# Apply (after operator approval + ISO loaded):
|
||||||
|
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/ci1.yaml
|
||||||
|
#
|
||||||
|
# Connect to console for Windows install:
|
||||||
|
# virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml vnc ci1 -n kubevirt-vms
|
||||||
|
# (Or via Guacamole once a connection profile is added.)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: kubevirt-stack
|
||||||
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
|
||||||
|
---
|
||||||
|
# ISO PVC — populated via CDI virtctl image-upload (CDI is now installed).
|
||||||
|
#
|
||||||
|
# **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to
|
||||||
|
# `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA
|
||||||
|
# CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107
|
||||||
|
# with `capabilities.drop: [ALL]` and cannot open the underlying block
|
||||||
|
# device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`).
|
||||||
|
# Reverted to Filesystem PVC pending one of:
|
||||||
|
# - CDI deployment override granting CAP_SYS_RAWIO to upload pod
|
||||||
|
# - Pre-populated PVC via privileged init pod that dd's the ISO directly
|
||||||
|
# - Migration to a different storage class that exposes block devices
|
||||||
|
# differently (e.g. iSCSI, where Longhorn's CSI mount path may behave
|
||||||
|
# differently)
|
||||||
|
#
|
||||||
|
# Population workflow (this PVC, Filesystem mode):
|
||||||
|
# 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \
|
||||||
|
# windows-server-2025-iso -n kubevirt-vms \
|
||||||
|
# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \
|
||||||
|
# --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \
|
||||||
|
# --uploadproxy-url https://localhost:8443 --insecure
|
||||||
|
# (--uploadproxy-url uses port-forward in practice: `kubectl port-forward
|
||||||
|
# -n cdi service/cdi-uploadproxy 8443:443 &` first.)
|
||||||
|
#
|
||||||
|
# **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed:
|
||||||
|
# BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...)
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the
|
||||||
|
# timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC.
|
||||||
|
# Block mode would likely fix it; see CDI permission issue above.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom
|
||||||
|
storageClassName: longhorn
|
||||||
|
|
||||||
|
---
|
||||||
|
# Root disk PVC — empty 200Gi volume that Windows installs into.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ci1-rootdisk
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 200Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
|
||||||
|
---
|
||||||
|
# Sysprep ConfigMap — autounattend.xml for hands-off Windows install.
|
||||||
|
# Sets local Administrator password (REPLACE the placeholder), enables RDP,
|
||||||
|
# enables WinRM, sets hostname, and configures static-ish networking via DHCP.
|
||||||
|
# The ISO + VirtIO drivers handle the rest.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: ci1-autounattend
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
data:
|
||||||
|
autounattend.xml: |
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<unattend xmlns="urn:schemas-microsoft-com:unattend">
|
||||||
|
|
||||||
|
<!-- Pass 1: WindowsPE — Disk setup and VirtIO driver injection -->
|
||||||
|
<settings pass="windowsPE">
|
||||||
|
<component name="Microsoft-Windows-International-Core-WinPE"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<SetupUILanguage>
|
||||||
|
<UILanguage>en-US</UILanguage>
|
||||||
|
</SetupUILanguage>
|
||||||
|
<InputLocale>en-US</InputLocale>
|
||||||
|
<SystemLocale>en-US</SystemLocale>
|
||||||
|
<UILanguage>en-US</UILanguage>
|
||||||
|
<UserLocale>en-US</UserLocale>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-PnpCustomizationsWinPE"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<DriverPaths>
|
||||||
|
<PathAndCredentials wcm:action="add" wcm:keyValue="1">
|
||||||
|
<Path>E:\amd64\2k25</Path>
|
||||||
|
</PathAndCredentials>
|
||||||
|
</DriverPaths>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<DiskConfiguration>
|
||||||
|
<Disk wcm:action="add">
|
||||||
|
<DiskID>0</DiskID>
|
||||||
|
<WillWipeDisk>true</WillWipeDisk>
|
||||||
|
<CreatePartitions>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<Size>260</Size>
|
||||||
|
<Type>EFI</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<Size>128</Size>
|
||||||
|
<Type>MSR</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<Extend>true</Extend>
|
||||||
|
<Type>Primary</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
</CreatePartitions>
|
||||||
|
<ModifyPartitions>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<PartitionID>1</PartitionID>
|
||||||
|
<Format>FAT32</Format>
|
||||||
|
<Label>EFI</Label>
|
||||||
|
</ModifyPartition>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<PartitionID>2</PartitionID>
|
||||||
|
</ModifyPartition>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<PartitionID>3</PartitionID>
|
||||||
|
<Format>NTFS</Format>
|
||||||
|
<Label>Windows</Label>
|
||||||
|
</ModifyPartition>
|
||||||
|
</ModifyPartitions>
|
||||||
|
</Disk>
|
||||||
|
</DiskConfiguration>
|
||||||
|
|
||||||
|
<ImageInstall>
|
||||||
|
<OSImage>
|
||||||
|
<InstallTo>
|
||||||
|
<DiskID>0</DiskID>
|
||||||
|
<PartitionID>3</PartitionID>
|
||||||
|
</InstallTo>
|
||||||
|
<!-- Index 2 = Standard Desktop Experience. Use 4 for Datacenter Desktop. -->
|
||||||
|
<InstallFrom>
|
||||||
|
<MetaData wcm:action="add">
|
||||||
|
<Key>/IMAGE/INDEX</Key>
|
||||||
|
<Value>2</Value>
|
||||||
|
</MetaData>
|
||||||
|
</InstallFrom>
|
||||||
|
</OSImage>
|
||||||
|
</ImageInstall>
|
||||||
|
|
||||||
|
<UserData>
|
||||||
|
<AcceptEula>true</AcceptEula>
|
||||||
|
<FullName>FlowerCore CI Runner</FullName>
|
||||||
|
<Organization>FlowerCore</Organization>
|
||||||
|
<!-- Eval install — no product key needed for 180-day evaluation -->
|
||||||
|
</UserData>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<!-- Pass 4: Specialize — Hostname, RDP, WinRM -->
|
||||||
|
<settings pass="specialize">
|
||||||
|
<component name="Microsoft-Windows-Shell-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<ComputerName>CI1</ComputerName>
|
||||||
|
<TimeZone>Central Standard Time</TimeZone>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-TerminalServices-LocalSessionManager"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<fDenyTSConnections>false</fDenyTSConnections>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<!-- Pass 7: OOBE — Admin account, RDP firewall, WinRM -->
|
||||||
|
<settings pass="oobeSystem">
|
||||||
|
<component name="Microsoft-Windows-Shell-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<OOBE>
|
||||||
|
<HideEULAPage>true</HideEULAPage>
|
||||||
|
<HideLocalAccountScreen>true</HideLocalAccountScreen>
|
||||||
|
<HideOEMRegistrationScreen>true</HideOEMRegistrationScreen>
|
||||||
|
<HideOnlineAccountScreens>true</HideOnlineAccountScreens>
|
||||||
|
<HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE>
|
||||||
|
<ProtectYourPC>3</ProtectYourPC>
|
||||||
|
</OOBE>
|
||||||
|
<UserAccounts>
|
||||||
|
<AdministratorPassword>
|
||||||
|
<!-- Real password is in 1Password — vault qaphopopkryhbg353ukzhhuqoq,
|
||||||
|
item id h3ix4mgfk65gmkcmvh6ly3d3hu, title:
|
||||||
|
"ci1 Administrator (Windows Server 2025 KubeVirt VM)".
|
||||||
|
Field "autounattend AdministratorPassword Value (UTF-16-LE base64)"
|
||||||
|
matches the Value below.
|
||||||
|
To rotate: regenerate, recompute base64
|
||||||
|
$combined = $pw + "AdministratorPassword"
|
||||||
|
[Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($combined))
|
||||||
|
then update both 1P item AND this Value field, recreate VM. -->
|
||||||
|
<Value>bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
|
||||||
|
<PlainText>false</PlainText>
|
||||||
|
</AdministratorPassword>
|
||||||
|
</UserAccounts>
|
||||||
|
<FirstLogonCommands>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Set-NetFirewallRule -DisplayGroup 'Remote Desktop' -Enabled True"</CommandLine>
|
||||||
|
<Description>Enable RDP firewall rule</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Enable-PSRemoting -Force; Set-Item WSMan:\localhost\Service\Auth\Basic $true; Set-Item WSMan:\localhost\Service\AllowUnencrypted $true"</CommandLine>
|
||||||
|
<Description>Enable WinRM (Phase 2 will pivot to HTTPS via step-ca cert)</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<CommandLine>cmd.exe /c reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System" /v EnableLUA /t REG_DWORD /d 0 /f</CommandLine>
|
||||||
|
<Description>Disable UAC (Phase 2 Puppet will re-evaluate)</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
</FirstLogonCommands>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
</unattend>
|
||||||
|
|
||||||
|
---
|
||||||
|
# VirtualMachine — Windows Server 2025 CI runner.
|
||||||
|
apiVersion: kubevirt.io/v1
|
||||||
|
kind: VirtualMachine
|
||||||
|
metadata:
|
||||||
|
name: ci1
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
# `running: true` is deprecated in favor of `runStrategy`. They are mutually
|
||||||
|
# exclusive — KubeVirt's validating webhook rejects any VM that sets both:
|
||||||
|
# admission webhook "virtualmachine-validator.kubevirt.io" denied the request:
|
||||||
|
# Running and RunStrategy are mutually exclusive.
|
||||||
|
# `Always` keeps a VMI running and restarts it if it crashes/exits — same
|
||||||
|
# semantics as the old `running: true`.
|
||||||
|
#
|
||||||
|
# **2026-05-08 status: VM cannot start due to a stale QEMU flock on the
|
||||||
|
# rootdisk PVC** (qemu reports `Failed to get "write" lock` on
|
||||||
|
# `/var/run/kubevirt-private/vmi-disks/rootdisk/disk.img`). The flock was
|
||||||
|
# left by a previous QEMU process during a force-deleted launcher pod
|
||||||
|
# cycle. Recovery requires either (a) a Longhorn engine restart on
|
||||||
|
# rke2-agent2, (b) a Longhorn volume detach via the longhorn-manager API
|
||||||
|
# (kubectl patch on `volume.longhorn.io/<pvc-name>` does not work — the
|
||||||
|
# spec.nodeID is reconciled back), or (c) a node reboot of rke2-agent2.
|
||||||
|
#
|
||||||
|
# **Confirmed working:** the bootOrder swap (windows-iso=1, rootdisk=2)
|
||||||
|
# and the runStrategy migration (above). The ISO PVC was successfully
|
||||||
|
# repopulated via virtctl image-upload pvc on the Filesystem-mode PVC.
|
||||||
|
#
|
||||||
|
# **Open: SATA CDROM read timeout** — even with bootOrder=1, OVMF reported
|
||||||
|
# `BdsDxe: failed to start Boot0001 ... Time out` reading the SATA CDROM
|
||||||
|
# backed by the Filesystem-mode PVC. A switch to Block-mode DataVolume
|
||||||
|
# was attempted but blocked by a CDI v1.65.0 upload-pod permission issue
|
||||||
|
# (capability drop prevents writing to the underlying block device).
|
||||||
|
# See header docstring on the ISO PVC.
|
||||||
|
runStrategy: Always # LIVE — ISO uploaded 2026-05-08, password in 1P
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
kubevirt.io/vm: ci1
|
||||||
|
spec:
|
||||||
|
domain:
|
||||||
|
cpu:
|
||||||
|
cores: 8
|
||||||
|
sockets: 1
|
||||||
|
threads: 1
|
||||||
|
memory:
|
||||||
|
guest: 16Gi
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 16Gi
|
||||||
|
limits:
|
||||||
|
memory: 16Gi
|
||||||
|
clock:
|
||||||
|
utc: {}
|
||||||
|
timer:
|
||||||
|
hpet:
|
||||||
|
present: false
|
||||||
|
pit:
|
||||||
|
tickPolicy: delay
|
||||||
|
rtc:
|
||||||
|
tickPolicy: catchup
|
||||||
|
hyperv: {}
|
||||||
|
features:
|
||||||
|
acpi: {}
|
||||||
|
apic: {}
|
||||||
|
hyperv:
|
||||||
|
relaxed: {}
|
||||||
|
vapic: {}
|
||||||
|
spinlocks:
|
||||||
|
spinlocks: 8191
|
||||||
|
smm: {}
|
||||||
|
firmware:
|
||||||
|
bootloader:
|
||||||
|
efi:
|
||||||
|
secureBoot: true
|
||||||
|
devices:
|
||||||
|
tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker
|
||||||
|
disks:
|
||||||
|
# bootOrder: ISO must be 1 for first-boot install (the rootdisk has no
|
||||||
|
# EFI bootloader yet). After Windows installs, it writes its own UEFI
|
||||||
|
# Boot#### entries pointing at the rootdisk's EFI partition; UEFI then
|
||||||
|
# boots from rootdisk going forward and the ISO at bootOrder:2 acts as
|
||||||
|
# a fallback for re-install scenarios.
|
||||||
|
#
|
||||||
|
# Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried
|
||||||
|
# the empty virtio disk first, got nothing, fell back to the SATA
|
||||||
|
# CDROM at Boot0001 with a short timeout, and timed out before the
|
||||||
|
# CDROM enumerated. Console showed:
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found.
|
||||||
|
# Confirmed via debug pod: PVC content IS a real bootable ISO9660
|
||||||
|
# (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the
|
||||||
|
# only bug was boot priority.
|
||||||
|
# 2026-05-08 PM: cdrom bus is SCSI (virtio-scsi controller). Bus
|
||||||
|
# choice is no longer load-bearing since the ISO is delivered via
|
||||||
|
# containerDisk (see volumes block below) — both SATA and SCSI
|
||||||
|
# work fine when the cdrom backing isn't a slow PVC. SCSI is kept
|
||||||
|
# because it's the modern bus and matches the standard FC
|
||||||
|
# KubeVirt VM template.
|
||||||
|
- name: windows-iso
|
||||||
|
bootOrder: 1
|
||||||
|
cdrom:
|
||||||
|
bus: scsi
|
||||||
|
- name: rootdisk
|
||||||
|
bootOrder: 2
|
||||||
|
disk:
|
||||||
|
bus: virtio
|
||||||
|
- name: virtio-drivers
|
||||||
|
cdrom:
|
||||||
|
bus: sata
|
||||||
|
- name: sysprep
|
||||||
|
cdrom:
|
||||||
|
bus: sata
|
||||||
|
interfaces:
|
||||||
|
# Pod-network fallback for Phase 1. To switch to PROD VLAN once Multus
|
||||||
|
# + the prod-vlan57 NAD exist, replace this block with:
|
||||||
|
# - name: prod-net
|
||||||
|
# bridge: {}
|
||||||
|
# model: virtio
|
||||||
|
# and update the networks: stanza to use multus.networkName: kubevirt-vms/prod-vlan57
|
||||||
|
- name: default
|
||||||
|
masquerade: {}
|
||||||
|
model: virtio
|
||||||
|
machine:
|
||||||
|
type: q35
|
||||||
|
networks:
|
||||||
|
- name: default
|
||||||
|
pod: {}
|
||||||
|
volumes:
|
||||||
|
- name: rootdisk
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ci1-rootdisk
|
||||||
|
- name: windows-iso
|
||||||
|
# 2026-05-08 PM (Path C, CONTAINERDISK): the ISO is now packaged as
|
||||||
|
# a KubeVirt containerDisk OCI image baked from
|
||||||
|
# `FROM scratch ; ADD --chown=107:107 disk.img /disk/disk.img`.
|
||||||
|
# The qemu user (uid 107) reads the ISO directly from a tmpfs view
|
||||||
|
# of the OCI layer, bypassing both:
|
||||||
|
# - Synology NFS export ACL (Path B failed: uid 107 denied at
|
||||||
|
# directory level even with mode 0777, see memory
|
||||||
|
# feedback_synology_iso_export_root_only_uid_107_denied)
|
||||||
|
# - OVMF cdrom read-window timeout (Path A and Path B's SCSI
|
||||||
|
# retry both hit `BdsDxe: failed to start Boot0001 ... Time out`
|
||||||
|
# when the cdrom was backed by a PVC the storage controller
|
||||||
|
# couldn't satisfy reads from fast enough).
|
||||||
|
#
|
||||||
|
# Image build (one-time, per ISO version):
|
||||||
|
# 1. Copy ISO to disk.img, write Dockerfile
|
||||||
|
# 2. podman build --tag localhost/win-server-2025:1.0 . (on noc1)
|
||||||
|
# 3. podman save -o win-server-2025-1.0.tar localhost/win-server-2025:1.0
|
||||||
|
# 4. SCP tar to all 3 RKE2 nodes (rke2-server, rke2-agent1, rke2-agent2)
|
||||||
|
# 5. sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||||
|
# -n k8s.io images import /tmp/win-server-2025-1.0.tar
|
||||||
|
# Standard FC pattern per `feedback_rke2_localhost_imagepullpolicy`.
|
||||||
|
#
|
||||||
|
# When a new Windows ISO version ships, bump the tag (1.1, 1.2, ...),
|
||||||
|
# rebuild + redistribute, and update the image: line below in a new
|
||||||
|
# commit. KubeVirt picks up the new image via a VM restart.
|
||||||
|
#
|
||||||
|
# The legacy NFS PVC + PV (apps/kubevirt-vms/win2025-iso-nfs-pv.yaml)
|
||||||
|
# and CDI Longhorn PVC (`windows-server-2025-iso`) are RETAINED for
|
||||||
|
# this commit so the prior states are recoverable. Once the
|
||||||
|
# containerDisk path proves on a successful Windows install, both
|
||||||
|
# legacy artifacts can be pruned in a follow-up commit.
|
||||||
|
containerDisk:
|
||||||
|
image: localhost/win-server-2025:1.0
|
||||||
|
imagePullPolicy: Never
|
||||||
|
- name: virtio-drivers
|
||||||
|
containerDisk:
|
||||||
|
# Pinned to v1.8.2 (latest stable as of 2026-05-08).
|
||||||
|
# The :latest tag uses Docker manifest v1 schema which containerd
|
||||||
|
# 2.1 (RKE2 v1.34.5) refuses to pull with:
|
||||||
|
# "media type application/vnd.docker.distribution.manifest.v1+prettyjws
|
||||||
|
# is no longer supported since containerd v2.1"
|
||||||
|
# v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1.
|
||||||
|
# Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags
|
||||||
|
image: quay.io/kubevirt/virtio-container-disk:v1.8.2
|
||||||
|
- name: sysprep
|
||||||
|
sysprep:
|
||||||
|
configMap:
|
||||||
|
name: ci1-autounattend
|
||||||
|
terminationGracePeriodSeconds: 3600
|
||||||
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# NetworkAttachmentDefinition — PROD VLAN 57 bridge
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: makes KubeVirt VMs reachable on the PROD VLAN (10.0.57.0/24)
|
||||||
|
# alongside the existing pod network. Required for ci1 to bridge onto PROD
|
||||||
|
# (e.g. to provision/scrape edge1, edge2, kiosks, Pis on the same L2 segment).
|
||||||
|
#
|
||||||
|
# **DEPLOY GATE — Phase 1.5 host work required first**:
|
||||||
|
# On every RKE2 node (rke2-server, rke2-agent1, rke2-agent2):
|
||||||
|
# 1. Switch port (UniFi USL16LP) trunks VLAN 57 to the node — usually
|
||||||
|
# already true since BLUEJAY-WS reaches 10.0.57.x services. Verify
|
||||||
|
# with `ip link show enp86s0.57` after configuring sub-interface, OR
|
||||||
|
# `tcpdump -ni enp86s0 vlan 57` and ping a known PROD host.
|
||||||
|
# 2. Linux bridge `br-prod` enslaving `enp86s0.57` (VLAN sub-interface).
|
||||||
|
# NetworkManager profile examples in the runbook below.
|
||||||
|
# 3. Verify Multus DaemonSet `kube-multus-ds` is Ready on all nodes.
|
||||||
|
#
|
||||||
|
# Without those, applying this NAD has no effect except to register the CRD.
|
||||||
|
# A VM that requests this NAD with no bridge present will fail with:
|
||||||
|
# `error adding pod kubevirt-vms_ci1 to CNI network "prod-vlan57": failed to
|
||||||
|
# plumb VLAN: open /sys/class/net/br-prod/master: no such file or directory`
|
||||||
|
#
|
||||||
|
# Configuration notes:
|
||||||
|
# - cniVersion 0.3.1 to match Multus daemon-config.json
|
||||||
|
# - mtu 1500 (matches enp86s0 default; bump if jumbo frames configured)
|
||||||
|
# - bridge name `br-prod` is convention; if Puppet picks a different name
|
||||||
|
# (e.g. `br57`, `br-vlan57`), edit BOTH this NAD and the ci1.yaml
|
||||||
|
# interface block. Keep them in sync.
|
||||||
|
# - vlan: 0 because the host bridge already strips VLAN tag (br-prod sits
|
||||||
|
# on top of `enp86s0.57`). If we instead used a VLAN-aware bridge with
|
||||||
|
# trunk port, set vlan: 57 here. Current convention is VLAN-stripped at
|
||||||
|
# the sub-interface, so the bridge passes untagged frames.
|
||||||
|
#
|
||||||
|
# Apply:
|
||||||
|
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/prod-vlan57-nad.yaml
|
||||||
|
#
|
||||||
|
# Then update ci1.yaml networks: stanza to:
|
||||||
|
# - name: prod-net
|
||||||
|
# multus:
|
||||||
|
# networkName: kubevirt-vms/prod-vlan57
|
||||||
|
# and the interface block from `masquerade` to `bridge`.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
# Namespace must exist already (created by ci1.yaml's first document).
|
||||||
|
# This file imports a NAD into that same namespace.
|
||||||
|
apiVersion: k8s.cni.cncf.io/v1
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
metadata:
|
||||||
|
name: prod-vlan57
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/host-bridge: "br-prod (enslaves enp86s0.57)"
|
||||||
|
bluejay.iamworkin.lan/cidr: "10.0.57.0/24"
|
||||||
|
bluejay.iamworkin.lan/gateway: "10.0.57.1"
|
||||||
|
bluejay.iamworkin.lan/dns: "10.0.56.1 (pfSense Unbound)"
|
||||||
|
spec:
|
||||||
|
config: |
|
||||||
|
{
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"name": "prod-vlan57",
|
||||||
|
"type": "bridge",
|
||||||
|
"bridge": "br-prod",
|
||||||
|
"ipam": {},
|
||||||
|
"mtu": 1500,
|
||||||
|
"vlan": 0,
|
||||||
|
"promiscMode": true,
|
||||||
|
"preserveDefaultVlan": false
|
||||||
|
}
|
||||||
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Windows Server 2025 ISO — Static NFS PV (Path B for SATA-CDROM timeout)
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: Mount the ISO from Synology NAS via NFS instead of from a Longhorn-
|
||||||
|
# backed Filesystem PVC.
|
||||||
|
#
|
||||||
|
# Why: SATA-CDROM emulation reading from a Longhorn-backed Filesystem PVC is
|
||||||
|
# too slow for OVMF's boot read window — the DVD-ROM enumeration times out
|
||||||
|
# before the bootloader can be read. Symptom on the serial console:
|
||||||
|
# BdsDxe: failed to start Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ...
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found
|
||||||
|
# Diagnosis confirmed the ISO content is a perfectly valid bootable ISO9660
|
||||||
|
# image — the bug is in the timing path between OVMF and Longhorn-backed
|
||||||
|
# storage, not in the ISO itself.
|
||||||
|
#
|
||||||
|
# Block-mode PVC was tried (`volumeMode: Block` via DataVolume) and would
|
||||||
|
# likely fix the timing, but CDI v1.65.0's upload-target pod cannot open the
|
||||||
|
# block device due to runAsUser:107 + capabilities.drop:[ALL] and we got:
|
||||||
|
# blockdev: cannot open /dev/cdi-block-volume: Permission denied
|
||||||
|
#
|
||||||
|
# NFS-mounted ISO bypasses both issues: no Longhorn slowness, no CDI upload
|
||||||
|
# pod permission concerns. The ISO is read directly from the NAS over a
|
||||||
|
# native NFSv4.1 mount that QEMU's SATA emulator can read at full LAN speed.
|
||||||
|
#
|
||||||
|
# Layout on Synology:
|
||||||
|
# /volume1/ISOs/ (existing export, RKE2 ACL)
|
||||||
|
# en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso
|
||||||
|
# win2025-iso-disk/ (new subdir, 2026-05-08)
|
||||||
|
# disk.img -> hardlink to ../en-us_windows_server_2025_..._8e06425a.iso
|
||||||
|
#
|
||||||
|
# KubeVirt's launcher pod expects a PVC mounted at
|
||||||
|
# /var/run/kubevirt-private/vmi-disks/<diskName>/disk.img — by mounting the
|
||||||
|
# `win2025-iso-disk/` subdir as the NFS PV root, `disk.img` lives at the PV's
|
||||||
|
# root and KubeVirt's CDROM emulator finds it without any path manipulation.
|
||||||
|
#
|
||||||
|
# A symlink would NOT work for sub-path NFS mounts (the relative target
|
||||||
|
# `../...iso` falls outside the sub-mount root). A hardlink works because it
|
||||||
|
# references the same inode regardless of mount point.
|
||||||
|
#
|
||||||
|
# Memory references:
|
||||||
|
# - feedback_synology_nfs_volume1_kubernetes_export_scoped (Synology export
|
||||||
|
# scoping pattern — but /volume1/ISOs export, unlike /volume1/kubernetes,
|
||||||
|
# does support sub-path mounts because Synology NFS is configured with
|
||||||
|
# pseudo-fs in NFSv4.1)
|
||||||
|
# - feedback_kubevirt_iso_first_install_bootorder_and_runstrategy (boot
|
||||||
|
# order / runStrategy gotchas, separate from the storage timing issue)
|
||||||
|
#
|
||||||
|
# Validation (2026-05-08, from rke2-server / rke2-agent1 / rke2-agent2):
|
||||||
|
# mount -t nfs -o nfsvers=4.1,ro 10.0.58.3:/volume1/ISOs/win2025-iso-disk /tmp/m
|
||||||
|
# file /tmp/m/disk.img
|
||||||
|
# -> ISO 9660 CD-ROM filesystem data 'SSS_X64FRE_EN-US_DV9' (bootable)
|
||||||
|
# All 3 RKE2 nodes can mount and read.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
labels:
|
||||||
|
flowercore.io/iso: windows-server-2025
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 8Gi
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: "" # static, no provisioner
|
||||||
|
mountOptions:
|
||||||
|
- nfsvers=4.1
|
||||||
|
- ro
|
||||||
|
- hard
|
||||||
|
- timeo=600
|
||||||
|
- retrans=3
|
||||||
|
nfs:
|
||||||
|
server: 10.0.58.3 # BlueJayNAS Synology DS1621+ on HOME VLAN 58
|
||||||
|
path: /volume1/ISOs/win2025-iso-disk
|
||||||
|
readOnly: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 8Gi
|
||||||
|
storageClassName: ""
|
||||||
|
volumeName: windows-server-2025-iso-nfs
|
||||||
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"icon": "external link",
|
||||||
|
"includeVars": false,
|
||||||
|
"keepTime": false,
|
||||||
|
"targetBlank": true,
|
||||||
|
"title": "Open Service",
|
||||||
|
"type": "link",
|
||||||
|
"url": "https://updatecenter.iamworkin.lan/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"0": {
|
||||||
|
"color": "#f87171",
|
||||||
|
"index": 1,
|
||||||
|
"text": "DOWN"
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"color": "#4ade80",
|
||||||
|
"index": 0,
|
||||||
|
"text": "UP"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Availability"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Service Availability",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"decimals": 2,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 95
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#FFB300",
|
||||||
|
"value": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 99.9
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percent"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 8,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background_solid",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "avg_over_time(probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}[24h]) * 100",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "24h Uptime"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24-Hour Uptime",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"max": 30,
|
||||||
|
"min": 0,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 7
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "d"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 16,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"minVizHeight": 75,
|
||||||
|
"minVizWidth": 75,
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showThresholdLabels": false,
|
||||||
|
"showThresholdMarkers": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "(probe_ssl_earliest_cert_expiry{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"} - time()) / 86400",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Days Remaining"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Cert Expiry (Days)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "gauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "Response Time (seconds)",
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 12,
|
||||||
|
"gradientMode": "scheme",
|
||||||
|
"lineInterpolation": "smooth",
|
||||||
|
"lineWidth": 2,
|
||||||
|
"pointSize": 4,
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": true,
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "dashed"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 14,
|
||||||
|
"x": 0,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull",
|
||||||
|
"mean",
|
||||||
|
"max"
|
||||||
|
],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_duration_seconds{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Probe Duration"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timeFrom": "1h",
|
||||||
|
"title": "Response Time (1h Trend)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 10,
|
||||||
|
"x": 14,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"alertInstanceLabelFilter": "{instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"alertName": "",
|
||||||
|
"dashboardAlerts": false,
|
||||||
|
"groupBy": [],
|
||||||
|
"groupMode": "default",
|
||||||
|
"maxItems": 10,
|
||||||
|
"sortOrder": 1,
|
||||||
|
"stateFilter": {
|
||||||
|
"error": true,
|
||||||
|
"firing": true,
|
||||||
|
"noData": true,
|
||||||
|
"normal": false,
|
||||||
|
"pending": true
|
||||||
|
},
|
||||||
|
"viewMode": "list"
|
||||||
|
},
|
||||||
|
"title": "Active Alerts",
|
||||||
|
"type": "alertlist"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 1,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"id": 20,
|
||||||
|
"title": "OTEL Counters — Track 1D",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 21,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_manifest_requests_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Manifest Requests rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 22,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug) (rate(updatecenter_bundle_download_bytes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Throughput by slug (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 23,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_checkins_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Agent Check-in Rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 1 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "none",
|
||||||
|
"decimals": 2
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 24,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["sum"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "increase(updatecenter_signature_verify_failures_total[1h])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig Verify Failures (1h)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failures (1h)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 25,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug, channel) (rate(updatecenter_release_publishes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}/{{channel}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Release Publishes rate by slug/channel (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 26,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (kind, status) (rate(updatecenter_bundle_downloads_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{kind}} / {{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Requests by kind/status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 2,
|
||||||
|
"fillOpacity": 20
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 0.01 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 27,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "rate(updatecenter_signature_verify_failures_total[5m])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig verify failures/s"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failure Rate (5m) — Critical if >0",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"blue-jay",
|
||||||
|
"flowercore",
|
||||||
|
"synthetic",
|
||||||
|
"updatecenter",
|
||||||
|
"otel"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "FlowerCore.UpdateCenter Dashboard",
|
||||||
|
"uid": "fc-updatecenter",
|
||||||
|
"version": 2
|
||||||
|
}
|
||||||
@@ -1024,6 +1024,72 @@ data:
|
|||||||
summary: "Longhorn node {{ $labels.node }} not Ready"
|
summary: "Longhorn node {{ $labels.node }} not Ready"
|
||||||
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# FC Signage Marquee Performance — Track 3 + 8 (2026-05-06)
|
||||||
|
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||||
|
# Source-of-truth for the live Podman Prometheus on noc1 is the
|
||||||
|
# Notes file; this K8s ConfigMap exists so a future migration to
|
||||||
|
# in-cluster Prometheus inherits the ruleset automatically.
|
||||||
|
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||||
|
# ============================================================
|
||||||
|
- name: fc-signage-marquee
|
||||||
|
rules:
|
||||||
|
- alert: MarqueeDroppedFramesHigh
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_dropped_frames_total[5m]))
|
||||||
|
/
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_render_latency_ms_count[5m]))
|
||||||
|
) > 0.05
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_dropped_frames_total[7d])
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee dropped-frame rate >5% on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Renderer {{ $labels.renderer }} on {{ $labels.node_id }} drops >5% of frames during {{ $labels.phase }}. Animation visibly stuttery."
|
||||||
|
|
||||||
|
- alert: MarqueeRenderLatencyP99High
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(
|
||||||
|
0.99,
|
||||||
|
sum by (renderer, phase, node_id, le) (rate(marquee_render_latency_ms_bucket[5m]))
|
||||||
|
) > 16
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_render_latency_ms_bucket[7d])
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee render latency p99 > 16ms on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Per-frame render latency p99 has exceeded the Pi-class 16ms budget for 10 minutes."
|
||||||
|
|
||||||
|
- alert: MarqueeAnimationDurationDrift
|
||||||
|
expr: |
|
||||||
|
abs(
|
||||||
|
histogram_quantile(0.5, sum by (renderer, phase, le) (rate(marquee_animation_duration_ms_bucket[15m])))
|
||||||
|
-
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
)
|
||||||
|
/
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
> 0.10
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_animation_duration_ms_bucket[7d])
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||||
|
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# ConfigMap: Blackbox Exporter Configuration
|
# ConfigMap: Blackbox Exporter Configuration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
286
apps/multus/multus.yaml
Normal file
286
apps/multus/multus.yaml
Normal file
@@ -0,0 +1,286 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Multus CNI — Meta-CNI for multi-network attachment to pods/VMs
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: enable KubeVirt VMs (and any future workload) to attach additional
|
||||||
|
# network interfaces beyond the default Calico-managed pod network. Required
|
||||||
|
# for ci1 (Windows Server 2025 KubeVirt VM) to bridge onto PROD VLAN 57.
|
||||||
|
#
|
||||||
|
# Source: upstream k8snetworkplumbingwg/multus-cni v4.2.2
|
||||||
|
# https://github.com/k8snetworkplumbingwg/multus-cni/blob/v4.2.2/deployments/multus-daemonset-thick.yml
|
||||||
|
#
|
||||||
|
# Inlined verbatim (with project header + version pin annotation) for
|
||||||
|
# reproducibility and air-gap safety. Bumping versions = edit this file +
|
||||||
|
# git push. ArgoCD picks up via the bluejay-infra ApplicationSet
|
||||||
|
# (apps/* directory generator on main).
|
||||||
|
#
|
||||||
|
# Why thick plugin (not thin):
|
||||||
|
# - Thick = daemon + thin shim binary; daemon handles NAD watch + CRD reads
|
||||||
|
# centrally so each pod's CNI ADD doesn't hit the K8s API server. Better
|
||||||
|
# for clusters with many NAD-using pods.
|
||||||
|
# - Thin = each CNI ADD process directly contacts K8s API. Simpler but
|
||||||
|
# scales worse and has more failure modes.
|
||||||
|
# - KubeVirt + multi-VM workload pattern fits thick perfectly.
|
||||||
|
#
|
||||||
|
# Cluster context (verified 2026-05-08):
|
||||||
|
# - RKE2 v1.34.5 on 3 nodes (rke2-server, rke2-agent1, rke2-agent2)
|
||||||
|
# - Calico CNI (Tigera-managed) at /etc/cni/net.d + /opt/cni/bin (default)
|
||||||
|
# - openSUSE Leap 16, kernel 6.12, containerd 2.1.5
|
||||||
|
# - host bridge for PROD VLAN 57 = `br-prod` (PUPPET HOST WORK — see Phase 1.5
|
||||||
|
# in docs/infrastructure/windows-server-build-runner-plan.md)
|
||||||
|
#
|
||||||
|
# Version pin: snapshot-thick → pinning to v4.2.2 release tag at deploy time
|
||||||
|
# would require a private mirror of the image. Upstream `snapshot-thick` tag
|
||||||
|
# is updated on every release, so for now we trust upstream + Calico's
|
||||||
|
# established pattern. Pin to a specific SHA256 once we mirror to Gitea OCI.
|
||||||
|
#
|
||||||
|
# Apply (once committed to bluejay-infra main, ApplicationSet auto-syncs):
|
||||||
|
# git add apps/multus/multus.yaml && git commit && git push origin main
|
||||||
|
# # ArgoCD `infra-multus` Application appears within 3 min via ApplicationSet
|
||||||
|
#
|
||||||
|
# Verify:
|
||||||
|
# kubectl -n kube-system get ds kube-multus-ds
|
||||||
|
# kubectl -n kube-system rollout status ds kube-multus-ds
|
||||||
|
# kubectl get crd network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
name: network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "k8snetworkplumbingwg/multus-cni v4.2.2"
|
||||||
|
spec:
|
||||||
|
group: k8s.cni.cncf.io
|
||||||
|
scope: Namespaced
|
||||||
|
names:
|
||||||
|
plural: network-attachment-definitions
|
||||||
|
singular: network-attachment-definition
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
shortNames:
|
||||||
|
- net-attach-def
|
||||||
|
versions:
|
||||||
|
- name: v1
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing
|
||||||
|
Working Group to express the intent for attaching pods to one or more logical or physical
|
||||||
|
networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
config:
|
||||||
|
description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration'
|
||||||
|
type: string
|
||||||
|
---
|
||||||
|
kind: ClusterRole
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["k8s.cni.cncf.io"]
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- pods/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
- events.k8s.io
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
---
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: multus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
kind: ConfigMap
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: multus-daemon-config
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
data:
|
||||||
|
daemon-config.json: |
|
||||||
|
{
|
||||||
|
"chrootDir": "/hostroot",
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"logLevel": "verbose",
|
||||||
|
"logToStderr": true,
|
||||||
|
"cniConfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusAutoconfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusConfigFile": "auto",
|
||||||
|
"socketDir": "/host/run/multus/"
|
||||||
|
}
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: kube-multus-ds
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: multus
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
tolerations:
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
serviceAccountName: multus
|
||||||
|
containers:
|
||||||
|
- name: kube-multus
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "50Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "50Mi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cni
|
||||||
|
mountPath: /host/etc/cni/net.d
|
||||||
|
# multus-daemon expects that cnibin path must be identical between pod and container host.
|
||||||
|
# e.g. if the cni bin is in '/opt/cni/bin' on the container host side, then it should be mount to '/opt/cni/bin' in multus-daemon,
|
||||||
|
# not to any other directory, like '/opt/bin' or '/usr/bin'.
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /opt/cni/bin
|
||||||
|
- name: host-run
|
||||||
|
mountPath: /host/run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
mountPath: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
mountPath: /var/lib/kubelet
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
mountPath: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
mountPath: /run/netns
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: multus-daemon-config
|
||||||
|
mountPath: /etc/cni/net.d/multus.d
|
||||||
|
readOnly: true
|
||||||
|
- name: hostroot
|
||||||
|
mountPath: /hostroot
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- mountPath: /etc/cni/multus/net.d
|
||||||
|
name: multus-conf-dir
|
||||||
|
env:
|
||||||
|
- name: MULTUS_NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
initContainers:
|
||||||
|
- name: install-multus-binary
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command:
|
||||||
|
- "sh"
|
||||||
|
- "-c"
|
||||||
|
- "cp /usr/src/multus-cni/bin/multus-shim /host/opt/cni/bin/multus-shim && cp /usr/src/multus-cni/bin/passthru /host/opt/cni/bin/passthru"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "10m"
|
||||||
|
memory: "15Mi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /host/opt/cni/bin
|
||||||
|
mountPropagation: Bidirectional
|
||||||
|
terminationGracePeriodSeconds: 10
|
||||||
|
volumes:
|
||||||
|
- name: cni
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/net.d
|
||||||
|
- name: cnibin
|
||||||
|
hostPath:
|
||||||
|
path: /opt/cni/bin
|
||||||
|
- name: hostroot
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: multus-daemon-config
|
||||||
|
configMap:
|
||||||
|
name: multus-daemon-config
|
||||||
|
items:
|
||||||
|
- key: daemon-config.json
|
||||||
|
path: daemon-config.json
|
||||||
|
- name: host-run
|
||||||
|
hostPath:
|
||||||
|
path: /run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/kubelet
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
hostPath:
|
||||||
|
path: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
hostPath:
|
||||||
|
path: /run/netns/
|
||||||
|
- name: multus-conf-dir
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/multus/net.d
|
||||||
210
apps/selenium/network-policy.yaml
Normal file
210
apps/selenium/network-policy.yaml
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
# Selenium Grid NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Captured into bluejay-infra 2026-05-07 during the regroup audit. This
|
||||||
|
# NetworkPolicy was previously applied via `kubectl apply` directly to
|
||||||
|
# the cluster with no source-of-truth anywhere — a fresh cluster rebuild
|
||||||
|
# would have lost all of it (including the Selenium Grid → Traefik VIP
|
||||||
|
# allow rule for AAT runs against `*.iamworkin.lan` services).
|
||||||
|
#
|
||||||
|
# The Selenium Grid Deployment + Services themselves are still managed
|
||||||
|
# outside ArgoCD (deployed via raw kubectl from the original Selenium
|
||||||
|
# Grid bring-up). Migrating those into bluejay-infra is a separate lane —
|
||||||
|
# this commit only restores GitOps repeatability for the NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Rules captured from the live cluster's `kubectl get netpol -n selenium
|
||||||
|
# selenium-netpol -o yaml` on 2026-05-07. Originally applied 2026-03-15
|
||||||
|
# (from `metadata.creationTimestamp` before the field was stripped).
|
||||||
|
#
|
||||||
|
# Allows:
|
||||||
|
# - Egress: CoreDNS, intra-namespace pod-to-pod (4442/4443/4444/5555),
|
||||||
|
# Traefik VIP for `*.iamworkin.lan` AAT runs, all FC namespaces on
|
||||||
|
# standard FC service ports (5100/5200/5300/5400/8080), pod CIDR
|
||||||
|
# (10.42.0.0/16) + service CIDR (10.43.0.0/16) for the same ports,
|
||||||
|
# LAN gateway range (10.0.56.0/24) for HTTPS, edge2 CUPS print
|
||||||
|
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
|
||||||
|
# fc-signage:5190 for the signage AAT lane.
|
||||||
|
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
|
||||||
|
# telephony / gitea / fc-system / fc-signage namespaces on 4444.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: selenium-netpol
|
||||||
|
namespace: selenium
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: selenium
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
egress:
|
||||||
|
- ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
- ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- podSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ports:
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.16/32
|
||||||
|
- ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 192.168.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 5190
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- podSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: telephony
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: gitea
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
podSelector: {}
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
|
||||||
60
apps/worldbuilder/README.md
Normal file
60
apps/worldbuilder/README.md
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
# FlowerCore.WorldBuilder
|
||||||
|
|
||||||
|
ArgoCD-managed manifest for FlowerCore.WorldBuilder.Web — comic / storyboard
|
||||||
|
authoring service that drives ComfyUI for panel image generation and
|
||||||
|
QuestPDF for letter / A4 export.
|
||||||
|
|
||||||
|
Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||||
|
|
||||||
|
## Deployment order
|
||||||
|
|
||||||
|
1. **DNS preflight** — `worldbuilder.iamworkin.lan -> 10.0.56.200` MUST exist
|
||||||
|
in pfSense Unbound before this manifest is applied, or cert-manager
|
||||||
|
HTTP-01 silently exponential-backs-off ~2h.
|
||||||
|
Memory: `feedback_pfsense_dns_required_for_acme`.
|
||||||
|
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
|
||||||
|
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
|
||||||
|
`rke2-agent2` (10.0.56.13). Build with:
|
||||||
|
```bash
|
||||||
|
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
ssh fcadmin@$h \
|
||||||
|
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||||
|
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
done
|
||||||
|
```
|
||||||
|
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||||
|
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||||
|
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||||
|
4. **First production render** — open `https://worldbuilder.iamworkin.lan`,
|
||||||
|
create World → Character → Storyboard → ExportJob, confirm artifact
|
||||||
|
downloads. ComfyUI lives on BLUEJAY-WS at `http://10.0.56.20:8188`.
|
||||||
|
|
||||||
|
## Health probes
|
||||||
|
|
||||||
|
- `startupProbe` + `readinessProbe`: `httpGet /healthz` (registered explicitly
|
||||||
|
in Program.cs — anonymous, no DB or OpenAPI dependency).
|
||||||
|
- `livenessProbe`: `tcpSocket` as a cheap fallback.
|
||||||
|
Memory: `feedback_k8s_probes_must_not_hit_openapi`,
|
||||||
|
`feedback_k8s_probes_behind_auth_middleware`.
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
|
||||||
|
- Longhorn RWO PVC `worldbuilder-data` (5Gi) mounted at `/data`. SQLite DB
|
||||||
|
lives at `/data/worldbuilder.db`, generated images under `/data/gallery/`,
|
||||||
|
PDF/PNG exports under `/data/exports/`.
|
||||||
|
- DataProtection keys persist to the same SQLite via
|
||||||
|
`AddFlowerCoreDataProtection<WorldBuilderDbContext>` — explicit migration
|
||||||
|
`20260429133417_Initial` already creates `fc_dp_keys`.
|
||||||
|
Memory: `feedback_dataprotection_keys_persist_to_app_dbcontext`,
|
||||||
|
`feedback_intranet_dataprotection_table_must_have_explicit_migration`.
|
||||||
|
|
||||||
|
## Image generation backend
|
||||||
|
|
||||||
|
`FlowerCore:WorldBuilder:ImageGeneration:BaseUrl=http://10.0.56.20:8188` —
|
||||||
|
ComfyUI runs on BLUEJAY-WS Windows (R9700 / gfx1201 / ROCm 7.2.1). Pod reaches
|
||||||
|
the workstation directly across the 10.0.56.0/24 VLAN (no Podman-style host-
|
||||||
|
filter issues — K8s pods route via Calico, which is L3-routed across the
|
||||||
|
VLAN).
|
||||||
213
apps/worldbuilder/worldbuilder.yaml
Normal file
213
apps/worldbuilder/worldbuilder.yaml
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
# FlowerCore.WorldBuilder — comic / storyboard authoring service.
|
||||||
|
#
|
||||||
|
# Deployment + Service + PVC + Certificate + IngressRoute. ArgoCD-managed
|
||||||
|
# end-to-end. See apps/worldbuilder/README.md for the per-deploy runbook.
|
||||||
|
#
|
||||||
|
# Image build (BLUEJAY-WS):
|
||||||
|
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
# done
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
# SQLite DB + generated image gallery + PDF/PNG exports.
|
||||||
|
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-data
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# RWO PVC + single replica. Recreate avoids multi-attach overlap.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics/prometheus"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
# Bump tag for each rebuild. Initial deploy: v202605062048
|
||||||
|
image: localhost/fc-worldbuilder:v202605062048
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_RUNNING_IN_CONTAINER
|
||||||
|
value: "true"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
# SQLite path overrides (default appsettings uses relative paths).
|
||||||
|
- name: ConnectionStrings__DefaultConnection
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "Sqlite"
|
||||||
|
- name: FlowerCore__Database__ConnectionStrings__Sqlite
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
# Generated image gallery + exports persist on /data.
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageStore__RootPath
|
||||||
|
value: "/data/gallery"
|
||||||
|
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||||
|
value: "/data/exports"
|
||||||
|
# ComfyUI on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1).
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||||
|
value: "http://10.0.56.20:8188"
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||||
|
value: "comfyui"
|
||||||
|
resources:
|
||||||
|
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
|
||||||
|
# time) while actual CPU usage is well below capacity. Idle Blazor
|
||||||
|
# Server + SignalR + a single ComfyUI poller uses ~5m, so 25m is
|
||||||
|
# generous. Re-evaluate if active rendering/export workers ever
|
||||||
|
# push past the limit.
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
# /healthz is registered explicitly in Program.cs (anonymous, no DB
|
||||||
|
# or OpenAPI dependency). Liveness uses tcpSocket as a cheap fallback
|
||||||
|
# in case future middleware changes accidentally gate /healthz.
|
||||||
|
# Memory: feedback_k8s_probes_must_not_hit_openapi,
|
||||||
|
# feedback_k8s_probes_behind_auth_middleware.
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: worldbuilder-data
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web-tls
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
spec:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- worldbuilder.iamworkin.lan
|
||||||
|
# step-ca ACME provisioner caps lifetime at 30d. Requesting 90d
|
||||||
|
# silently capped to 30d, making renewBefore 720h (30d) equal to the
|
||||||
|
# actual cert lifetime — triggered a perpetual renewal loop that
|
||||||
|
# generated 2365+ CertificateRequest objects in 18h. Match the working
|
||||||
|
# 720h/240h pattern used by every other FC service cert.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`worldbuilder.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: worldbuilder-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
@@ -22,10 +22,16 @@ public sealed class FleetManifestLintTests
|
|||||||
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
|
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
|
||||||
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
|
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
|
||||||
// set should fail this lint.
|
// set should fail this lint.
|
||||||
|
//
|
||||||
|
// PUB-1 (2026-05-06): update.flowercore.io / updates.flowercore.io were
|
||||||
|
// added for the Cloudflare-proxied public Update Center edge. They use the
|
||||||
|
// same bounded read-write allowlist as the LAN pair.
|
||||||
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
|
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
|
||||||
{
|
{
|
||||||
"updatecenter.iamworkin.lan",
|
"updatecenter.iamworkin.lan",
|
||||||
"updates.iamworkin.lan",
|
"updates.iamworkin.lan",
|
||||||
|
"update.flowercore.io",
|
||||||
|
"updates.flowercore.io",
|
||||||
};
|
};
|
||||||
|
|
||||||
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
|
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
|
||||||
|
|||||||
@@ -6,7 +6,12 @@ package bluejayinfra.public_readwrite_allowlist
|
|||||||
# PUT/PATCH/DELETE must still 404 at the route. Any host in this set MUST
|
# PUT/PATCH/DELETE must still 404 at the route. Any host in this set MUST
|
||||||
# include all four required methods AND MUST NOT include any forbidden
|
# include all four required methods AND MUST NOT include any forbidden
|
||||||
# method.
|
# method.
|
||||||
public_readwrite_hosts := {"updatecenter.iamworkin.lan", "updates.iamworkin.lan"}
|
public_readwrite_hosts := {
|
||||||
|
"updatecenter.iamworkin.lan",
|
||||||
|
"updates.iamworkin.lan",
|
||||||
|
"update.flowercore.io",
|
||||||
|
"updates.flowercore.io",
|
||||||
|
}
|
||||||
|
|
||||||
required_methods := {"GET", "HEAD", "POST", "OPTIONS"}
|
required_methods := {"GET", "HEAD", "POST", "OPTIONS"}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user