Compare commits
147 Commits
codex/agen
...
codex/s54-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e2e93d482c | ||
| 4319cc2b51 | |||
|
|
2bf339ce51 | ||
|
|
5bdedfc5ae | ||
|
|
0307ae16ae | ||
|
|
6c18f69cf2 | ||
|
|
47e2256556 | ||
|
|
9d77f8ba0e | ||
|
|
2f4be19c85 | ||
|
|
2a62c40990 | ||
|
|
7be98e5efc | ||
|
|
a65b356c9d | ||
|
|
08c17ef1b4 | ||
|
|
06f2f002b7 | ||
|
|
7ac4a8b4b7 | ||
|
|
90f2a86819 | ||
|
|
cbdefb2b23 | ||
|
|
1c36fe3a0a | ||
|
|
2b420ce8a4 | ||
|
|
5cbc1a06b1 | ||
|
|
9e7ee39b3a | ||
|
|
ae030a5f33 | ||
| bc8c35896f | |||
|
|
2cc91b6df0 | ||
| 0d2090fe81 | |||
|
|
bc3548e715 | ||
| 74333cc26b | |||
|
|
7310fb88c2 | ||
| 148bc87b9a | |||
|
|
2a1e842100 | ||
| bc28430d24 | |||
|
|
cc92272217 | ||
| d6f4468a9c | |||
|
|
2f796a2ebd | ||
| 1f1f6823db | |||
|
|
b92f74b63a | ||
|
|
cb7f7dbc4d | ||
|
|
03126d5584 | ||
|
|
495e884c41 | ||
|
|
65aa1e6104 | ||
|
|
7f2a3b76b4 | ||
| ea73f00461 | |||
|
|
25ace30a03 | ||
|
|
ca574c2280 | ||
|
|
09387f90e1 | ||
|
|
e641ceab48 | ||
|
|
c263426ea5 | ||
|
|
bacac067cf | ||
| 914fed08d8 | |||
|
|
200aeab032 | ||
|
|
8182616d4c | ||
|
|
f0862ac03c | ||
|
|
46c392605e | ||
| 89b147bbdd | |||
| d7238a5e3b | |||
| fc444a02a1 | |||
| 83d4883d55 | |||
| f8fe3b2688 | |||
| f2ab892ebc | |||
| fef68a9560 | |||
|
|
6fe77225ae | ||
| 634b9c4169 | |||
| b8c7e59005 | |||
| 65ac8d6f01 | |||
| 35844e0dbd | |||
| b1e307151e | |||
| 12b07219c7 | |||
| 9fd32c4415 | |||
| ad670fb344 | |||
|
|
6f6ca50987 | ||
|
|
c7be58c1f7 | ||
|
|
a1f5a393cd | ||
|
|
710340d8be | ||
|
|
7d2daaa4f8 | ||
|
|
e50e103ba0 | ||
|
|
e8094eb0bd | ||
| 8d87d9172c | |||
|
|
cfd9743afa | ||
|
|
5029e209cd | ||
|
|
f298339152 | ||
|
|
6e7d88db49 | ||
|
|
5ae50bd491 | ||
|
|
653d4472f5 | ||
|
|
eb8693e1ce | ||
|
|
667777a653 | ||
|
|
84c9feb893 | ||
|
|
427dbfcef2 | ||
|
|
b651a4e2d0 | ||
|
|
b998f50f48 | ||
|
|
8fd9ae1cd3 | ||
|
|
fc2aca0e9e | ||
|
|
ba18c52130 | ||
|
|
9f6dc1a9d5 | ||
|
|
0bf47dfa33 | ||
|
|
87a7d7c70a | ||
|
|
1c4145a581 | ||
|
|
c50a403f74 | ||
|
|
fb7bd10528 | ||
|
|
6c21d14a98 | ||
|
|
b3529f8e96 | ||
|
|
00c11b4eaa | ||
|
|
04881f46f0 | ||
|
|
c0038e4859 | ||
|
|
dee48831c6 | ||
|
|
0f1dc5f871 | ||
|
|
11c5f6e6cc | ||
|
|
d637fe9b30 | ||
|
|
5bfe41beca | ||
|
|
df22774674 | ||
|
|
c4065b15a3 | ||
|
|
a4aa612373 | ||
|
|
c2eb37dee9 | ||
|
|
bf6f542569 | ||
|
|
e150b2102f | ||
|
|
33a765b0bc | ||
|
|
5484ed7db6 | ||
|
|
2aa84349ea | ||
|
|
851f8e673b | ||
|
|
f78f8c8192 | ||
|
|
9b255fefc1 | ||
|
|
6a89a76e39 | ||
|
|
2489464d4f | ||
|
|
4b777b16ac | ||
|
|
8c60e3a4d3 | ||
|
|
df02b4c3c3 | ||
|
|
c0dceafffd | ||
|
|
490db8f9e6 | ||
|
|
1926bdaf3b | ||
|
|
ca8d062826 | ||
|
|
1889462fc4 | ||
|
|
523ba61232 | ||
|
|
53f67c8713 | ||
|
|
6b9cf3d12c | ||
|
|
0b52093b36 | ||
|
|
7a9098d3bd | ||
|
|
57d7ba46a7 | ||
|
|
9ec2e2d52e | ||
|
|
b4d62a8a50 | ||
|
|
fbbc07023b | ||
|
|
4b0eef0fb0 | ||
|
|
bb09a3786f | ||
|
|
006dbcf671 | ||
|
|
1be71d6ba7 | ||
|
|
0c8026c912 | ||
|
|
621ae47e00 | ||
|
|
ae6b8c0142 | ||
|
|
da55220218 |
2
.gitattributes
vendored
Normal file
2
.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
/.gitattributes text eol=lf
|
||||||
|
*.sh text eol=lf
|
||||||
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# .NET build outputs (lint test project)
|
||||||
|
**/bin/
|
||||||
|
**/obj/
|
||||||
|
|
||||||
|
# Editor / temp
|
||||||
|
.DS_Store
|
||||||
|
*.swp
|
||||||
26
README.md
26
README.md
@@ -99,10 +99,36 @@ curl -sk -X DELETE https://dns.iamworkin.lan/api/v1/servers/<serverId>/zones/iam
|
|||||||
- **CoreDNS template + ndots:5 collision**: inside pods, `<svc>.<ns>.svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (`<svc>`) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`.
|
- **CoreDNS template + ndots:5 collision**: inside pods, `<svc>.<ns>.svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (`<svc>`) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`.
|
||||||
- **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2.
|
- **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2.
|
||||||
- **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`.
|
- **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`.
|
||||||
|
- **IngressRoute namespace split**: this RKE2 Traefik install does not allow cross-namespace service refs. Keep the `IngressRoute`, backend `Service`, and TLS secret in the same namespace; if one host is shared across namespaces, duplicate the `Certificate` and move the route next to the destination service.
|
||||||
|
- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods.
|
||||||
|
- **Public read-write allowlist hosts**: if a public host accepts a tightly bounded write surface (e.g. bootstrap-JWT POST), pin the allowlist as `(Method(GET) || Method(HEAD) || Method(POST) || Method(OPTIONS))`. PUT/PATCH/DELETE must still 404 at the route. Track A's `updatecenter.iamworkin.lan` / `updates.iamworkin.lan` are the canonical example. The lint test enforces this invariant.
|
||||||
|
- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow.
|
||||||
|
- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs.
|
||||||
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
||||||
|
|
||||||
|
## Local manifest lint
|
||||||
|
|
||||||
|
The repo now carries a local-first lint pass for the recurring K8s gotchas that have burned the fleet:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release
|
||||||
|
```
|
||||||
|
|
||||||
|
That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`.
|
||||||
|
|
||||||
|
## Non-K8s Pi Artifacts
|
||||||
|
|
||||||
|
Some `apps/*` directories are deployment artifact bundles consumed by Puppet
|
||||||
|
instead of Kubernetes workloads. `apps/fc-signage-pi-player/` carries the
|
||||||
|
Chromium signage Pi player, `apps/fc-divoom-dm-pi-device/` carries the additive
|
||||||
|
edge2 Divoom-as-DeviceManagement-device profile/Hiera contract, and
|
||||||
|
`apps/fc-divoom-tv-pi/` carries the Divoom TV Pi HDMI systemd/Puppet shape.
|
||||||
|
These bundles intentionally avoid Deployment, IngressRoute, Certificate, and
|
||||||
|
OnePasswordItem resources.
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
|
||||||
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
||||||
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
||||||
- Public DNS operator host: `https://dns.iamworkin.lan`
|
- Public DNS operator host: `https://dns.iamworkin.lan`
|
||||||
|
|||||||
@@ -127,6 +127,18 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
|
itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Knowledge MCP bearer token for the direct Agent Zero -> Knowledge.Web path.
|
||||||
|
# The 1Password item currently stores the raw token in its concealed PASSWORD
|
||||||
|
# field, which the operator syncs to Secret key `password`.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
namespace: agent-zero
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
@@ -242,13 +254,30 @@ spec:
|
|||||||
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
||||||
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
|
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
|
||||||
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
|
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
|
||||||
# the secret-backed CHAT_MCP_API_KEY env var before initialize.sh.
|
# the secret-backed env vars before initialize.sh. Keep the local
|
||||||
# Use the in-cluster Chat service URL rather than the public
|
# corpus_search.py tool mounted either way so outage fallback
|
||||||
# Traefik hostname so the pod stays off the private VIP lane that
|
# remains available even when fc_knowledge is not advertised.
|
||||||
# the default egress rule blocks.
|
export KNOWLEDGE_MCP_ENABLED=false
|
||||||
if [ -n "${CHAT_MCP_API_KEY:-}" ]; then
|
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||||
export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}"
|
if curl -sf --connect-timeout 3 "${KNOWLEDGE_MCP_HEALTH_URL}" > /dev/null && \
|
||||||
|
curl -sf --connect-timeout 5 \
|
||||||
|
-H "Authorization: Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}" \
|
||||||
|
-H "Accept: application/json, text/event-stream" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"jsonrpc":"2.0","id":"fc-knowledge-bootstrap","method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"agent-zero-bootstrap","version":"1.0"}}}' \
|
||||||
|
"${KNOWLEDGE_MCP_URL}" > /dev/null; then
|
||||||
|
export KNOWLEDGE_MCP_ENABLED=true
|
||||||
|
echo "fc_knowledge enabled from ${KNOWLEDGE_MCP_URL}."
|
||||||
|
else
|
||||||
|
echo "fc_knowledge unavailable or unauthorized; keeping local corpus_search.py as the fallback path."
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
echo "fc_knowledge token missing; keeping local corpus_search.py as the fallback path."
|
||||||
|
fi
|
||||||
|
|
||||||
|
export A0_SET_mcp_servers="$(
|
||||||
|
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
|
||||||
|
)"
|
||||||
# Run the original entrypoint
|
# Run the original entrypoint
|
||||||
exec /exe/initialize.sh $BRANCH
|
exec /exe/initialize.sh $BRANCH
|
||||||
ports:
|
ports:
|
||||||
@@ -351,6 +380,19 @@ spec:
|
|||||||
name: chat-mcp-api-key
|
name: chat-mcp-api-key
|
||||||
key: api-key
|
key: api-key
|
||||||
optional: true
|
optional: true
|
||||||
|
# FlowerCore.Knowledge MCP Phase 1 — direct Agent Zero client path.
|
||||||
|
# Probe /healthz first, then try an authenticated initialize call.
|
||||||
|
# If either fails, Agent Zero boots without fc_knowledge and keeps
|
||||||
|
# the local corpus_search.py tool as the outage-safe path.
|
||||||
|
- name: KNOWLEDGE_MCP_URL
|
||||||
|
value: "http://knowledge-web.knowledge.svc/mcp"
|
||||||
|
- name: KNOWLEDGE_MCP_HEALTH_URL
|
||||||
|
value: "http://knowledge-web.knowledge.svc/healthz"
|
||||||
|
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
# Print.Web — Thermal printer service on edge2.
|
# Print.Web — Thermal printer service on edge2.
|
||||||
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
|
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
|
||||||
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
|
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
|
||||||
@@ -575,6 +617,17 @@ spec:
|
|||||||
protocol: TCP
|
protocol: TCP
|
||||||
- port: 8080
|
- port: 8080
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
# FlowerCore.Knowledge MCP (Phase 1) — in-cluster direct route with
|
||||||
|
# anonymous /healthz probe plus authenticated /mcp initialize/tool calls.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: knowledge
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
# Intranet search API — use in-cluster svc so traffic stays inside
|
# Intranet search API — use in-cluster svc so traffic stays inside
|
||||||
# the cluster and is not blocked by the private-range egress denylist.
|
# the cluster and is not blocked by the private-range egress denylist.
|
||||||
- to:
|
- to:
|
||||||
|
|||||||
@@ -20,7 +20,19 @@ spec:
|
|||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/hostname: rke2-agent1
|
kubernetes.io/hostname: rke2-agent1
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
# Keep the search list free of iamworkin.lan so CoreDNS's wildcard
|
||||||
|
# template cannot hijack public egress like downloads.asterisk.org.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- telephony.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 0
|
fsGroup: 0
|
||||||
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
||||||
|
|||||||
448
apps/authentik/authentik.yaml
Normal file
448
apps/authentik/authentik.yaml
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
# Authentik OIDC backend
|
||||||
|
# ArgoCD-managed. BlueJay Lab.
|
||||||
|
#
|
||||||
|
# Stack:
|
||||||
|
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
|
||||||
|
# - Redis 7 Deployment (no persistence — session/cache only)
|
||||||
|
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
|
||||||
|
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
|
||||||
|
# - Certificate via step-ca-acme ClusterIssuer
|
||||||
|
# - Traefik IngressRoute at id.iamworkin.lan
|
||||||
|
#
|
||||||
|
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
|
||||||
|
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
|
||||||
|
#
|
||||||
|
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
|
||||||
|
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
|
||||||
|
# via API once the bootstrap token is available — see Notes substrate).
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: authentik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
|
|
||||||
|
---
|
||||||
|
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
|
||||||
|
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
|
||||||
|
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: authentik-credentials
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Shared media volume for server + worker pods.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: authentik-media
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
storageClassName: longhorn
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 2Gi
|
||||||
|
|
||||||
|
---
|
||||||
|
# PostgreSQL 16 StatefulSet — Authentik's primary store.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: authentik-postgres
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-postgres
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
persistentVolumeClaimRetentionPolicy:
|
||||||
|
whenDeleted: Retain
|
||||||
|
whenScaled: Retain
|
||||||
|
podManagementPolicy: OrderedReady
|
||||||
|
serviceName: authentik-postgres
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 10
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-postgres
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-postgres
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:16-alpine
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
name: postgres
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
value: authentik
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: authentik
|
||||||
|
- name: POSTGRES_INITDB_ARGS
|
||||||
|
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
|
||||||
|
- name: PGDATA
|
||||||
|
value: /var/lib/postgresql/data/pgdata
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "authentik"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "authentik"]
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 100m, memory: 256Mi }
|
||||||
|
limits: { cpu: 1000m, memory: 1Gi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: pgdata
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: pgdata
|
||||||
|
spec:
|
||||||
|
storageClassName: longhorn
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-postgres
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
clusterIP: None
|
||||||
|
selector:
|
||||||
|
app: authentik-postgres
|
||||||
|
ports:
|
||||||
|
- name: postgres
|
||||||
|
port: 5432
|
||||||
|
targetPort: 5432
|
||||||
|
|
||||||
|
---
|
||||||
|
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-redis
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-redis
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-redis
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
args:
|
||||||
|
- "--save"
|
||||||
|
- ""
|
||||||
|
- "--appendonly"
|
||||||
|
- "no"
|
||||||
|
- "--requirepass"
|
||||||
|
- "$(REDIS_PASSWORD)"
|
||||||
|
env:
|
||||||
|
- name: REDIS_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
name: redis
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket: { port: 6379 }
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket: { port: 6379 }
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 50m, memory: 64Mi }
|
||||||
|
limits: { cpu: 500m, memory: 256Mi }
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-redis
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: authentik-redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
|
||||||
|
---
|
||||||
|
# Authentik server Deployment — HTTP frontend on :9000.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-server
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-server
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # shares /media RWO PVC with worker
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-server
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-server
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
|
||||||
|
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
|
||||||
|
# non-root container can mkdir /media/public during the tenant_files migration.
|
||||||
|
fsGroup: 1000
|
||||||
|
containers:
|
||||||
|
- name: server
|
||||||
|
image: ghcr.io/goauthentik/server:2024.12.3
|
||||||
|
args: ["server"]
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
name: http
|
||||||
|
- containerPort: 9443
|
||||||
|
name: https
|
||||||
|
env:
|
||||||
|
- name: AUTHENTIK_SECRET_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: AUTHENTIK_SECRET_KEY
|
||||||
|
- name: AUTHENTIK_REDIS__HOST
|
||||||
|
value: authentik-redis
|
||||||
|
- name: AUTHENTIK_REDIS__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||||
|
value: authentik-postgres
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__USER
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_PASSWORD
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_TOKEN
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_EMAIL
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_EMAIL
|
||||||
|
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||||
|
value: "true"
|
||||||
|
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||||
|
value: "false"
|
||||||
|
- name: AUTHENTIK_LOG_LEVEL
|
||||||
|
value: info
|
||||||
|
# First-boot Authentik can take 3+ min on the migration phase
|
||||||
|
# (waiting on DB lock while worker also runs migrations). Initial
|
||||||
|
# delays are generous so kubelet doesn't kill the pod mid-migration;
|
||||||
|
# periodSeconds keeps post-startup probing responsive.
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/ready/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 12
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/live/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 300
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/live/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 10
|
||||||
|
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 150m, memory: 512Mi }
|
||||||
|
limits: { cpu: 1500m, memory: 1Gi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: media
|
||||||
|
mountPath: /media
|
||||||
|
volumes:
|
||||||
|
- name: media
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: authentik-media
|
||||||
|
|
||||||
|
---
|
||||||
|
# Authentik worker Deployment — runs Celery background tasks.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-worker
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-worker
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # shares /media RWO PVC with server
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-worker
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
# Same as server pod — non-root uid 1000 needs PVC group write.
|
||||||
|
fsGroup: 1000
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: ghcr.io/goauthentik/server:2024.12.3
|
||||||
|
args: ["worker"]
|
||||||
|
env:
|
||||||
|
- name: AUTHENTIK_SECRET_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: AUTHENTIK_SECRET_KEY
|
||||||
|
- name: AUTHENTIK_REDIS__HOST
|
||||||
|
value: authentik-redis
|
||||||
|
- name: AUTHENTIK_REDIS__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||||
|
value: authentik-postgres
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__USER
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||||
|
value: "true"
|
||||||
|
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||||
|
value: "false"
|
||||||
|
- name: AUTHENTIK_LOG_LEVEL
|
||||||
|
value: info
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 100m, memory: 256Mi }
|
||||||
|
limits: { cpu: 1000m, memory: 768Mi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: media
|
||||||
|
mountPath: /media
|
||||||
|
volumes:
|
||||||
|
- name: media
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: authentik-media
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-server
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: authentik-server
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
- name: https
|
||||||
|
port: 9443
|
||||||
|
targetPort: 9443
|
||||||
|
|
||||||
|
---
|
||||||
|
# step-ca leaf certificate for id.iamworkin.lan.
|
||||||
|
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
|
||||||
|
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
|
||||||
|
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: authentik-tls
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
secretName: authentik-tls
|
||||||
|
dnsNames:
|
||||||
|
- id.iamworkin.lan
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: authentik
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
entryPoints: [websecure]
|
||||||
|
routes:
|
||||||
|
- match: Host(`id.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: authentik-server
|
||||||
|
port: 9000
|
||||||
|
tls:
|
||||||
|
secretName: authentik-tls
|
||||||
69
apps/cdi/README.md
Normal file
69
apps/cdi/README.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# CDI — Containerized Data Importer
|
||||||
|
|
||||||
|
KubeVirt's `containerized-data-importer` for populating PVCs from external
|
||||||
|
sources (HTTP, HTTPS, container registry, S3, virtctl upload). Required to
|
||||||
|
import the Windows Server 2025 ISO into the `windows-server-2025-iso` PVC
|
||||||
|
that `apps/kubevirt-vms/ci1.yaml` mounts as a CDROM.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Source | Purpose |
|
||||||
|
| ----------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------- |
|
||||||
|
| `cdi-operator.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — verbatim copy | Installs operator + CRDs (5779 lines, large) |
|
||||||
|
| `cdi-cr.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — annotated + commented | Tells operator to deploy CDI components |
|
||||||
|
|
||||||
|
`cdi-operator.yaml` is **vendored verbatim** from the upstream release for
|
||||||
|
air-gap reproducibility (no internet fetch at deploy time, ArgoCD prune
|
||||||
|
contracts hold). To bump versions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CDI_VER=v1.66.0 # for example
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-operator.yaml" \
|
||||||
|
-o apps/cdi/cdi-operator.yaml
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-cr.yaml" \
|
||||||
|
-o /tmp/cdi-cr-new.yaml # then re-apply project header diff
|
||||||
|
git diff apps/cdi/ # review
|
||||||
|
git commit + push
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verify after deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n cdi get pods # operator + apiserver + deployment + uploadproxy
|
||||||
|
kubectl get cdis cdi -o jsonpath='{.status.phase}' # "Deployed"
|
||||||
|
kubectl get crd | grep cdi.kubevirt.io
|
||||||
|
# Expected CRDs: datavolumes.cdi.kubevirt.io, cdiconfigs.cdi.kubevirt.io,
|
||||||
|
# storageprofiles.cdi.kubevirt.io, dataimportcrons.cdi.kubevirt.io,
|
||||||
|
# datasources.cdi.kubevirt.io, objecttransfers.cdi.kubevirt.io
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use after install
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Example DataVolume that imports from HTTP
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: DataVolume
|
||||||
|
metadata:
|
||||||
|
name: my-iso
|
||||||
|
spec:
|
||||||
|
source:
|
||||||
|
http:
|
||||||
|
url: "https://server/path/to.iso"
|
||||||
|
pvc:
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Or upload from local disk via virtctl
|
||||||
|
virtctl image-upload pvc my-iso \
|
||||||
|
--image-path ./my.iso \
|
||||||
|
--size 10Gi \
|
||||||
|
--storage-class longhorn \
|
||||||
|
--access-mode ReadWriteOnce \
|
||||||
|
--uploadproxy-url https://cdi-uploadproxy.cdi.svc:443 \
|
||||||
|
--insecure
|
||||||
|
```
|
||||||
36
apps/cdi/cdi-cr.yaml
Normal file
36
apps/cdi/cdi-cr.yaml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# CDI CR — Tells the CDI operator to install CDI components into the cluster.
|
||||||
|
# =============================================================================
|
||||||
|
# After cdi-operator.yaml is applied, the operator watches for THIS resource
|
||||||
|
# (CDI named "cdi"). When found, it deploys cdi-apiserver, cdi-deployment,
|
||||||
|
# cdi-uploadproxy, cdi-cronjob, and the importer/uploadserver/cloner pods.
|
||||||
|
#
|
||||||
|
# Configuration:
|
||||||
|
# - HonorWaitForFirstConsumer: PVCs created by DataVolumes wait for first
|
||||||
|
# pod to schedule before binding (lets storage class pick best node).
|
||||||
|
# - WebhookPvcRendering: validates PVC creation against CDI policies.
|
||||||
|
# - imagePullPolicy IfNotPresent: re-pull only on tag rotation.
|
||||||
|
# - nodeSelector linux: pin to Linux nodes (no Windows worker support).
|
||||||
|
#
|
||||||
|
# Andrew may want to add a `uploadProxyURLOverride` later to expose the
|
||||||
|
# uploadproxy via Traefik IngressRoute for `virtctl image-upload` from
|
||||||
|
# BLUEJAY-WS without `kubectl port-forward`. Phase 2 enhancement.
|
||||||
|
# =============================================================================
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: CDI
|
||||||
|
metadata:
|
||||||
|
name: cdi
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "kubevirt/containerized-data-importer v1.65.0"
|
||||||
|
spec:
|
||||||
|
config:
|
||||||
|
featureGates:
|
||||||
|
- HonorWaitForFirstConsumer
|
||||||
|
- WebhookPvcRendering
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
infra:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
|
workload:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
5779
apps/cdi/cdi-operator.yaml
Normal file
5779
apps/cdi/cdi-operator.yaml
Normal file
File diff suppressed because it is too large
Load Diff
@@ -30,3 +30,41 @@ spec:
|
|||||||
port: 80
|
port: 80
|
||||||
tls:
|
tls:
|
||||||
secretName: chat-web-tls
|
secretName: chat-web-tls
|
||||||
|
---
|
||||||
|
# Public host profile marker. The app treats this header as authoritative for
|
||||||
|
# the public twin, while the internal chat.iamworkin.lan route does not attach
|
||||||
|
# it and keeps the operator-oriented UI.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: chat-public-profile-header
|
||||||
|
namespace: fc-chat
|
||||||
|
spec:
|
||||||
|
headers:
|
||||||
|
customRequestHeaders:
|
||||||
|
X-FC-Chat-Host-Profile: "public"
|
||||||
|
---
|
||||||
|
# Public Cloudflare-fronted twin for the anonymous chat surface. Operator
|
||||||
|
# paths are intentionally absent from the allowlist below, so /admin,
|
||||||
|
# /operator, /console, /ops, /api/operator, and /operatorhub miss this route
|
||||||
|
# and return Traefik 404 before reaching the pod. Operator action still needed:
|
||||||
|
# create/verify Cloudflare DNS chat.flowercore.io -> public Traefik endpoint
|
||||||
|
# and mirror the cf-origin-flowercore-io TLS secret into namespace fc-chat.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: chat-web-public
|
||||||
|
namespace: fc-chat
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`chat.flowercore.io`) && (Path(`/`) || Path(`/chat`) || PathPrefix(`/_blazor`) || PathPrefix(`/_framework`) || PathPrefix(`/_content`) || PathPrefix(`/avatars`) || PathPrefix(`/css`) || PathPrefix(`/js`) || PathPrefix(`/favicon`) || PathPrefix(`/chathub`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
middlewares:
|
||||||
|
- name: chat-public-profile-header
|
||||||
|
services:
|
||||||
|
- name: chat-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: cf-origin-flowercore-io
|
||||||
|
|||||||
@@ -1,5 +1,18 @@
|
|||||||
# FlowerCore Remote Desktop — TLS + Ingress
|
# FlowerCore Remote Desktop — TLS + Ingress
|
||||||
# Deployment and Service managed by deploy script (not ArgoCD)
|
#
|
||||||
|
# Source-of-truth split:
|
||||||
|
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies
|
||||||
|
# (see network-policies.yaml in this directory).
|
||||||
|
# - FlowerCore.RemoteDesktop scripts/deploy-web.sh OWNS: Deployment +
|
||||||
|
# Service. Reason: image refs like `localhost/fc-desktop:linux-xfce`
|
||||||
|
# only exist on each node's containerd after a manual import, so a
|
||||||
|
# Deployment manifest in bluejay-infra would race the image-import
|
||||||
|
# step and crash-loop.
|
||||||
|
#
|
||||||
|
# NetworkPolicies moved into bluejay-infra 2026-05-07 — previously they
|
||||||
|
# were applied via the deploy script's kubectl apply calls, which broke
|
||||||
|
# cluster-rebuild repeatability. See
|
||||||
|
# feedback_networkpolicies_belong_in_bluejay_infra.md.
|
||||||
---
|
---
|
||||||
apiVersion: cert-manager.io/v1
|
apiVersion: cert-manager.io/v1
|
||||||
kind: Certificate
|
kind: Certificate
|
||||||
|
|||||||
332
apps/fc-desktop/network-policies.yaml
Normal file
332
apps/fc-desktop/network-policies.yaml
Normal file
@@ -0,0 +1,332 @@
|
|||||||
|
# FlowerCore Remote Desktop — NetworkPolicies (GitOps-managed)
|
||||||
|
#
|
||||||
|
# Moved into bluejay-infra 2026-05-07 as part of the regroup audit. These
|
||||||
|
# four policies were previously applied via FlowerCore.RemoteDesktop's
|
||||||
|
# scripts/deploy-web.sh `kubectl apply` calls, which meant a fresh cluster
|
||||||
|
# rebuild from bluejay-infra alone would miss them — Browser Lab session
|
||||||
|
# isolation, control-plane allow-list, and HTTP-01 cert renewal would all
|
||||||
|
# silently fail to come up.
|
||||||
|
#
|
||||||
|
# Source-of-truth contract:
|
||||||
|
# - bluejay-infra OWNS all NetworkPolicy + Certificate + IngressRoute
|
||||||
|
# resources for fc-desktop.
|
||||||
|
# - FlowerCore.RemoteDesktop's scripts/deploy-web.sh continues to own
|
||||||
|
# the Deployment + Service apply (because the image ref
|
||||||
|
# `localhost/fc-desktop:linux-xfce` only exists on each node's
|
||||||
|
# containerd after a manual import — it can't be pulled from a
|
||||||
|
# registry, so a Deployment manifest in bluejay-infra would race the
|
||||||
|
# image-import step and crash-loop).
|
||||||
|
---
|
||||||
|
# 1) desktop-isolation — Browser Lab session pods.
|
||||||
|
#
|
||||||
|
# Locks down pods labeled `app.kubernetes.io/name=remote-desktop` (every
|
||||||
|
# session pod regardless of template). Allows guacd ingress for the VNC/RDP
|
||||||
|
# display lane and remotedesktop-web's pre-handoff probing. Egress: NFS to
|
||||||
|
# Synology, DNS, Traefik (cluster + LB VIP), Intranet (Browser Lab home).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: desktop-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remote-desktop
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 445
|
||||||
|
protocol: TCP
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.33.87/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 2) fc-desktop-default-deny — namespace-wide catch-all.
|
||||||
|
#
|
||||||
|
# Selects every pod EXCEPT remotedesktop-web (the public-surface control
|
||||||
|
# plane) and applies default-deny semantics for both Ingress and Egress.
|
||||||
|
# Closes the gap where session pods land WITHOUT the desktop-isolation
|
||||||
|
# policy's `app.kubernetes.io/name=remote-desktop` label, plus prevents
|
||||||
|
# arbitrary debug sidecars / kubectl debug images from getting cluster
|
||||||
|
# access.
|
||||||
|
#
|
||||||
|
# CRITICAL: also catches transient cm-acme-http-solver pods (that's the
|
||||||
|
# bug this whole regroup chased). The cm-acme-http-solver-allow policy
|
||||||
|
# below is the explicit carve-out.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-desktop-default-deny
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app.kubernetes.io/name
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
---
|
||||||
|
# 3) remotedesktop-web-isolation — control plane explicit allow-list.
|
||||||
|
#
|
||||||
|
# remotedesktop-web is the only pod label the default-deny excludes, so
|
||||||
|
# without this policy the control plane would have wide-open Ingress AND
|
||||||
|
# Egress. This re-introduces a tight allow-list:
|
||||||
|
# - Ingress: Traefik only on TCP/8080
|
||||||
|
# - Egress: CoreDNS, K8s API, Guacamole admin, NFS, Intranet,
|
||||||
|
# Traefik (cluster + LB), and the fc-desktop namespace itself
|
||||||
|
# (for session pod readiness probing).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: remotedesktop-web-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# K8s API server
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Guacamole admin
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
# Intranet web
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
# Cluster Traefik pods (in-cluster service resolution + Guacamole
|
||||||
|
# routing handoff where web app builds URLs against the public host
|
||||||
|
# but resolves internally).
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# fc-desktop namespace — session pod probing during browser-access
|
||||||
|
# readiness checks.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 4) cm-acme-http-solver-allow — cert-manager HTTP-01 carve-out.
|
||||||
|
#
|
||||||
|
# Without this, fc-desktop-default-deny catches the transient solver pods
|
||||||
|
# cert-manager creates for each renewal (they don't carry the
|
||||||
|
# remotedesktop-web label). Caused 8-day silent renewal failure on
|
||||||
|
# desktop.iamworkin.lan in 2026-04-28..2026-05-07 (see
|
||||||
|
# feedback_certmanager_renewal_stuck_when_solver_blocked_by_namespace_default_deny.md).
|
||||||
|
#
|
||||||
|
# Authorizes:
|
||||||
|
# - Ingress on TCP/8089 from cluster Traefik (which proxies the external
|
||||||
|
# HTTP-01 GET on port 80 through to the solver).
|
||||||
|
# - Egress for cluster DNS (defensive — newer cert-manager probes from
|
||||||
|
# inside the solver too).
|
||||||
|
#
|
||||||
|
# The `acme.cert-manager.io/http01-solver=true` label is set by
|
||||||
|
# cert-manager itself on every solver pod automatically.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: cm-acme-http-solver-allow
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: cert-renewal
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
acme.cert-manager.io/http01-solver: "true"
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Runtime secrets for FlowerCore.DeviceManagement.
|
||||||
|
#
|
||||||
|
# OnePasswordItem operator syncs this item into a Kubernetes Secret with the
|
||||||
|
# same name. Expected fields:
|
||||||
|
# DB-Password
|
||||||
|
# mtls-ca.pem
|
||||||
|
# mtls-client.crt
|
||||||
|
# mtls-client.key
|
||||||
|
# mtls-chain.pem
|
||||||
|
#
|
||||||
|
# Do not add literal secret values to this repo. Runtime pods consume the
|
||||||
|
# synced Secret through env vars and read-only mounts.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/component: secrets
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"
|
||||||
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Certificate for devices.iamworkin.lan.
|
||||||
|
#
|
||||||
|
# Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
|
||||||
|
# devices.iamworkin.lan -> 10.0.56.200
|
||||||
|
# before this Certificate is synced. step-ca ACME cannot see the CoreDNS
|
||||||
|
# wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
|
||||||
|
# (feedback_pfsense_dns_required_for_acme).
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-tls
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
|
||||||
|
spec:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- devices.iamworkin.lan
|
||||||
|
duration: 720h
|
||||||
|
renewBefore: 240h
|
||||||
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- devices/status
|
||||||
|
- devices/finalizers
|
||||||
|
- devicegroups/status
|
||||||
|
- devicegroups/finalizers
|
||||||
|
- devicepolicies/status
|
||||||
|
- devicepolicies/finalizers
|
||||||
|
- remotecommands/status
|
||||||
|
- remotecommands/finalizers
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- services
|
||||||
|
- configmaps
|
||||||
|
- secrets
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- batch
|
||||||
|
resources:
|
||||||
|
- jobs
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- networkpolicies
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# FlowerCore.DeviceManagement Operator.
|
||||||
|
#
|
||||||
|
# KubeOps controller for devices.flowercore.io resources. Operator-created
|
||||||
|
# children must set OwnerReferences + traceability labels/annotations per
|
||||||
|
# k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
|
||||||
|
# apps/deployments/get so the process can resolve its own Deployment UID.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: fc-devicemgmt-operator
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: operator
|
||||||
|
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
|
||||||
|
value: "fc-devicemgmt-operator"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Operator"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 20
|
||||||
|
periodSeconds: 30
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
|
||||||
|
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||||
|
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||||
|
# nodes before letting ArgoCD sync a live rollout.
|
||||||
|
#
|
||||||
|
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
||||||
|
# The Web pod cannot start until TWO upstream gaps close:
|
||||||
|
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
||||||
|
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
||||||
|
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
||||||
|
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
||||||
|
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
||||||
|
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
||||||
|
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
||||||
|
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
||||||
|
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
||||||
|
# password configured for the MySQL user.
|
||||||
|
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
||||||
|
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
||||||
|
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 0
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-devicemgmt-web:v20260512-cx5
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Web"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "MySql"
|
||||||
|
- name: FlowerCore__Database__Host
|
||||||
|
value: "mysql.fc-mysql.svc"
|
||||||
|
- name: FlowerCore__Database__Database
|
||||||
|
value: "flowercore_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__User
|
||||||
|
value: "fc_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
key: DB-Password
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.key"
|
||||||
|
- name: FlowerCore__EventBus__Redis__Configuration
|
||||||
|
value: "redis.fc-redis.svc:6379"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
startupProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
mountPath: /secrets/devicemgmt-mtls
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
secret:
|
||||||
|
secretName: fc-devicemgmt-runtime
|
||||||
|
defaultMode: 0400
|
||||||
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# LAN ingress for FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
|
||||||
|
# ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`devices.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: fc-devicemgmt-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
|
||||||
|
# Future public agent/update host gate (OFF by default):
|
||||||
|
#
|
||||||
|
# Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
|
||||||
|
# resolves the public-device-management auth model and route ownership with
|
||||||
|
# UpdateCenter. When enabled, use a separate public IngressRoute with an
|
||||||
|
# explicit Method allowlist, public-host auth middleware, and public TLS
|
||||||
|
# certificate strategy. Leaving this as comments keeps ArgoCD from stealing
|
||||||
|
# live UpdateCenter traffic.
|
||||||
|
#
|
||||||
|
# apiVersion: traefik.io/v1alpha1
|
||||||
|
# kind: IngressRoute
|
||||||
|
# metadata:
|
||||||
|
# name: fc-devicemgmt-web-public
|
||||||
|
# namespace: fc-devicemgmt
|
||||||
|
# annotations:
|
||||||
|
# flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
|
||||||
|
# spec:
|
||||||
|
# entryPoints:
|
||||||
|
# - websecure
|
||||||
|
# routes:
|
||||||
|
# - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
# kind: Rule
|
||||||
|
# services:
|
||||||
|
# - name: fc-devicemgmt-web
|
||||||
|
# port: 80
|
||||||
|
# tls:
|
||||||
|
# secretName: fc-devicemgmt-public-tls
|
||||||
13
apps/fc-devicemgmt/namespace.yaml
Normal file
13
apps/fc-devicemgmt/namespace.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# FlowerCore.DeviceManagement namespace.
|
||||||
|
#
|
||||||
|
# ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
# FlowerCore.DeviceManagement NetworkPolicies.
|
||||||
|
#
|
||||||
|
# NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
|
||||||
|
# Rules include Traefik post-DNAT backend ports per
|
||||||
|
# feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
|
||||||
|
# cold-tier / future artifact path.
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
# LAN edge: only cluster Traefik should reach the Web pod for
|
||||||
|
# devices.iamworkin.lan.
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
|
||||||
|
- from:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Database namespace.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-mysql
|
||||||
|
ports:
|
||||||
|
- port: 3306
|
||||||
|
protocol: TCP
|
||||||
|
# Redis backplane for multi-replica SignalR / live-status fan-out.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
protocol: TCP
|
||||||
|
# Traefik VIP / in-cluster Traefik for self-callbacks and public URL
|
||||||
|
# generation tests. Include post-DNAT backend ports 8443 + 8080.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
|
||||||
|
# ThinClient, or Server mode. Keep this private-range only.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS cold-tier / artifact mount allowance.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: monitoring
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress for operator-initiated probes / fallback command dispatch.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS allowance for future cold-tier/audit archival jobs.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
22
apps/fc-devicemgmt/service-web.yaml
Normal file
22
apps/fc-devicemgmt/service-web.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
protocol: TCP
|
||||||
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
@@ -118,7 +118,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
||||||
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-distribution:v202604240010
|
image: localhost/fc-distribution:v202605061948
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
@@ -151,6 +151,10 @@ spec:
|
|||||||
value: "/signing/aistation-field/chain.pem"
|
value: "/signing/aistation-field/chain.pem"
|
||||||
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
||||||
value: "/signing/aistation-field/private-key.pem"
|
value: "/signing/aistation-field/private-key.pem"
|
||||||
|
# Public distribution host is GET/HEAD-only at Traefik; this
|
||||||
|
# entitlement list controls which editions are readable there.
|
||||||
|
- name: FlowerCore__Distribution__EntitlementPublic__PublicEditions__0
|
||||||
|
value: "*"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
@@ -262,8 +266,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- dist.iamworkin.lan
|
- dist.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10880+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# FlowerCore Divoom DM Pi Device
|
||||||
|
|
||||||
|
Source-controlled Puppet/Hiera deployment contract for registering the edge2
|
||||||
|
Divoom MiniToo panel as a FlowerCore DeviceManagement-managed Pi device.
|
||||||
|
|
||||||
|
This is not a Kubernetes application. The live panel remains the existing
|
||||||
|
edge2 `flowercore-divoom.service` managed by `FlowerCore.Puppet`
|
||||||
|
`profile::pi::service::divoom`, with the .NET payload deployed out of band
|
||||||
|
and `/opt/flowercore/divoom/data` plus the Bluetooth shell wrappers preserved.
|
||||||
|
Because edge2 is already Hiera-driven through `profile::pi::service::apps`,
|
||||||
|
the deploy home is additive `profile::pi::service` data/profile source, not
|
||||||
|
`profile::edge::service::apps` and not an ArgoCD/K8s app.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Stage DeviceManagement registration metadata for the edge2 Divoom MiniToo.
|
||||||
|
- Stage a separate, disabled-by-default DM Agent executor unit for privileged
|
||||||
|
Bluetooth operations once the DM-RPC lane lands.
|
||||||
|
- Keep `flowercore-divoom.service` and `flowercore-divoom-bt.service`
|
||||||
|
untouched: no service replacement, no restart subscription, no K8s surface.
|
||||||
|
- Preserve the current wrapper contract:
|
||||||
|
`/opt/flowercore/divoom/bt-link.sh`,
|
||||||
|
`/opt/flowercore/divoom/bt-reset.sh`, and
|
||||||
|
`/opt/flowercore/divoom/audio-link.sh`.
|
||||||
|
- Keep FM radio disabled and require visible render proof; device-info echo is
|
||||||
|
not render proof.
|
||||||
|
|
||||||
|
## Artifact Map
|
||||||
|
|
||||||
|
| Path | Use |
|
||||||
|
| --- | --- |
|
||||||
|
| `hiera/edge2-divoom-dm-device.overlay.yaml` | Additive Hiera overlay for edge2. Merge into the existing node YAML without removing `fc-pimanager` or `fc-divoom`. |
|
||||||
|
| `puppet/profile/pi/service/divoom_dm_device.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet` after the DM-RPC executor binary exists. |
|
||||||
|
| `puppet/templates/divoom-device-registration.json.epp` | DM device registration metadata rendered on edge2. |
|
||||||
|
| `puppet/templates/flowercore-divoom-dm-agent.service.epp` | Separate DM Agent systemd unit. Defaults are stopped and disabled until a later cutover. |
|
||||||
|
|
||||||
|
## Rollout Notes
|
||||||
|
|
||||||
|
1. Land these artifacts in bluejay-infra as the deploy contract.
|
||||||
|
2. Vendor the Puppet profile and EPP templates into `FlowerCore.Puppet`.
|
||||||
|
3. Merge the Hiera overlay into `data/nodes/edge2.iamworkin.lan.yaml`.
|
||||||
|
4. Run Puppet in noop first, preferably with a node-local validation directory
|
||||||
|
under `~/.fcv` rather than `/tmp`.
|
||||||
|
5. Only enable the DM Agent service after the DeviceManagement BT executor has
|
||||||
|
landed and passed operator-eyeball render proof.
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
---
|
||||||
|
# Merge into FlowerCore.Puppet data/nodes/edge2.iamworkin.lan.yaml.
|
||||||
|
# Additive overlay only: keep the existing fc-pimanager version/tarball entry,
|
||||||
|
# keep fc-divoom enabled, and do not move Divoom into Kubernetes.
|
||||||
|
|
||||||
|
profile::pi::service::apps:
|
||||||
|
fc-pimanager:
|
||||||
|
binary: 'FlowerCore.PiManager.Web'
|
||||||
|
install_dir: '/opt/fc-pimanager'
|
||||||
|
port: 5000
|
||||||
|
environment: 'edge2'
|
||||||
|
version: '2026.05.28.1646'
|
||||||
|
tarball_source: 'puppet:///modules/profile/pi/builds/fc-pimanager.tar.gz'
|
||||||
|
fc-divoom:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
profile::pi::service::divoom_dm_device::ensure: 'present'
|
||||||
|
profile::pi::service::divoom_dm_device::service_enabled: false
|
||||||
|
profile::pi::service::divoom_dm_device::service_ensure: 'stopped'
|
||||||
|
profile::pi::service::divoom_dm_device::device_id: 'edge2-divoom-minitoo'
|
||||||
|
profile::pi::service::divoom_dm_device::display_name: 'edge2 Divoom MiniToo'
|
||||||
|
profile::pi::service::divoom_dm_device::host_fqdn: 'edge2.iamworkin.lan'
|
||||||
|
profile::pi::service::divoom_dm_device::dm_web_url: 'https://devicemgmt.iamworkin.lan'
|
||||||
|
profile::pi::service::divoom_dm_device::divoom_install_dir: '/opt/flowercore/divoom'
|
||||||
|
profile::pi::service::divoom_dm_device::agent_install_dir: '/opt/flowercore/devicemanagement-agent'
|
||||||
|
profile::pi::service::divoom_dm_device::bt_candidate_channels:
|
||||||
|
- '1'
|
||||||
|
- '10'
|
||||||
|
profile::pi::service::divoom_dm_device::default_bt_channel: '1'
|
||||||
|
profile::pi::service::divoom_dm_device::a2dp_default_state: 'off'
|
||||||
|
profile::pi::service::divoom_dm_device::fm_radio_enabled: false
|
||||||
|
profile::pi::service::divoom_dm_device::visible_render_proof_required: true
|
||||||
@@ -0,0 +1,140 @@
|
|||||||
|
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_dm_device.pp.
|
||||||
|
# This profile is additive to profile::pi::service::divoom. It must not manage,
|
||||||
|
# restart, replace, or subscribe the existing flowercore-divoom.service.
|
||||||
|
class profile::pi::service::divoom_dm_device (
|
||||||
|
Enum['present', 'absent'] $ensure = 'present',
|
||||||
|
Boolean $service_enabled = false,
|
||||||
|
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||||
|
String $service_name = 'flowercore-divoom-dm-agent',
|
||||||
|
String $device_id = 'edge2-divoom-minitoo',
|
||||||
|
String $display_name = 'edge2 Divoom MiniToo',
|
||||||
|
String $host_fqdn = 'edge2.iamworkin.lan',
|
||||||
|
String $dm_web_url = 'https://devicemgmt.iamworkin.lan',
|
||||||
|
String $divoom_install_dir = '/opt/flowercore/divoom',
|
||||||
|
String $agent_install_dir = '/opt/flowercore/devicemanagement-agent',
|
||||||
|
String $agent_binary = 'FlowerCore.DeviceManagement.Agent',
|
||||||
|
Array[String] $bt_candidate_channels = ['1', '10'],
|
||||||
|
String $default_bt_channel = '1',
|
||||||
|
Enum['on', 'off'] $a2dp_default_state = 'off',
|
||||||
|
Boolean $fm_radio_enabled = false,
|
||||||
|
Boolean $visible_render_proof_required = true,
|
||||||
|
) {
|
||||||
|
include profile::workstation::safe_account_exclusion
|
||||||
|
|
||||||
|
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||||
|
$config_dir = '/etc/flowercore/device-management/devices'
|
||||||
|
$state_dir = '/var/lib/flowercore/divoom-dm-agent'
|
||||||
|
$log_dir = '/var/log/flowercore/divoom-dm-agent'
|
||||||
|
$registration_path = "${config_dir}/${device_id}.json"
|
||||||
|
$agent_binary_path = "${agent_install_dir}/${agent_binary}"
|
||||||
|
$bt_channels_json = inline_template('[<%= @bt_candidate_channels.map { |c| "\"#{c}\"" }.join(", ") %>]')
|
||||||
|
|
||||||
|
if $safe_account {
|
||||||
|
notify { 'fc-divoom-dm-device safe-account exclusion':
|
||||||
|
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom DM Pi device profile refused to apply on operator workstation',
|
||||||
|
}
|
||||||
|
|
||||||
|
if $facts['os']['family'] != 'windows' {
|
||||||
|
ensure_resource('file', '/var/log/flowercore-audit', {
|
||||||
|
'ensure' => 'directory',
|
||||||
|
'owner' => 'root',
|
||||||
|
'group' => 'root',
|
||||||
|
'mode' => '0755',
|
||||||
|
})
|
||||||
|
|
||||||
|
file { '/var/log/flowercore-audit/safe-account-noop-fc-divoom-dm-device.log':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => "noop: divoom dm pi device profile refused to apply on safe-account host\n",
|
||||||
|
require => File['/var/log/flowercore-audit'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} elsif $ensure == 'absent' {
|
||||||
|
service { $service_name:
|
||||||
|
ensure => stopped,
|
||||||
|
enable => false,
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [
|
||||||
|
"/etc/systemd/system/${service_name}.service",
|
||||||
|
$registration_path,
|
||||||
|
]:
|
||||||
|
ensure => absent,
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
case $facts['os']['family'] {
|
||||||
|
'Debian': {}
|
||||||
|
default: { fail("profile::pi::service::divoom_dm_device only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [$config_dir, $state_dir, $log_dir]:
|
||||||
|
ensure => directory,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
file { $registration_path:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => epp('profile/pi/fc_divoom_dm/divoom-device-registration.json.epp', {
|
||||||
|
'device_id' => $device_id,
|
||||||
|
'display_name' => $display_name,
|
||||||
|
'host_fqdn' => $host_fqdn,
|
||||||
|
'divoom_install_dir' => $divoom_install_dir,
|
||||||
|
'bt_channels_json' => $bt_channels_json,
|
||||||
|
'default_bt_channel' => $default_bt_channel,
|
||||||
|
'a2dp_default_state' => $a2dp_default_state,
|
||||||
|
'fm_radio_enabled' => $fm_radio_enabled,
|
||||||
|
'visible_render_proof_required' => $visible_render_proof_required,
|
||||||
|
}),
|
||||||
|
require => File[$config_dir],
|
||||||
|
}
|
||||||
|
|
||||||
|
file { "/etc/systemd/system/${service_name}.service":
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => epp('profile/pi/fc_divoom_dm/flowercore-divoom-dm-agent.service.epp', {
|
||||||
|
'service_name' => $service_name,
|
||||||
|
'device_id' => $device_id,
|
||||||
|
'dm_web_url' => $dm_web_url,
|
||||||
|
'registration_path' => $registration_path,
|
||||||
|
'divoom_install_dir' => $divoom_install_dir,
|
||||||
|
'agent_install_dir' => $agent_install_dir,
|
||||||
|
'agent_binary_path' => $agent_binary_path,
|
||||||
|
'state_dir' => $state_dir,
|
||||||
|
'log_dir' => $log_dir,
|
||||||
|
}),
|
||||||
|
notify => Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||||
|
require => File[$registration_path],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
service { $service_name:
|
||||||
|
ensure => $service_ensure,
|
||||||
|
enable => $service_enabled,
|
||||||
|
require => [
|
||||||
|
File["/etc/systemd/system/${service_name}.service"],
|
||||||
|
File[$registration_path],
|
||||||
|
Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"deviceId": "<%= $device_id %>",
|
||||||
|
"displayName": "<%= $display_name %>",
|
||||||
|
"hostFqdn": "<%= $host_fqdn %>",
|
||||||
|
"kind": "DivoomMiniToo",
|
||||||
|
"managedBy": "FlowerCore.DeviceManagement",
|
||||||
|
"executionMode": "Pi",
|
||||||
|
"transport": {
|
||||||
|
"kind": "BluetoothSerial",
|
||||||
|
"candidateChannels": <%= $bt_channels_json %>,
|
||||||
|
"defaultChannel": "<%= $default_bt_channel %>",
|
||||||
|
"deviceInfoIsRenderProof": false,
|
||||||
|
"visibleRenderProofRequired": <%= $visible_render_proof_required %>
|
||||||
|
},
|
||||||
|
"paths": {
|
||||||
|
"divoomInstallDir": "<%= $divoom_install_dir %>",
|
||||||
|
"btLink": "<%= $divoom_install_dir %>/bt-link.sh",
|
||||||
|
"btReset": "<%= $divoom_install_dir %>/bt-reset.sh",
|
||||||
|
"audioLink": "<%= $divoom_install_dir %>/audio-link.sh"
|
||||||
|
},
|
||||||
|
"capabilities": {
|
||||||
|
"supportsBluetoothSerial": true,
|
||||||
|
"supportsBtChannelRedetect": true,
|
||||||
|
"supportsBtHardReset": true,
|
||||||
|
"supportsBtAudioProfileSwitch": true,
|
||||||
|
"a2dpDefaultState": "<%= $a2dp_default_state %>",
|
||||||
|
"fmRadioEnabled": <%= $fm_radio_enabled %>
|
||||||
|
},
|
||||||
|
"safety": {
|
||||||
|
"preserveExistingService": "flowercore-divoom.service",
|
||||||
|
"preserveDataDirectory": "<%= $divoom_install_dir %>/data",
|
||||||
|
"doNotEnableFmRadio": true
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom DM Agent Bluetooth executor
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target bluetooth.service
|
||||||
|
Requires=bluetooth.service
|
||||||
|
ConditionPathExists=<%= $agent_binary_path %>
|
||||||
|
ConditionPathExists=<%= $registration_path %>
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=stoltz
|
||||||
|
Group=stoltz
|
||||||
|
WorkingDirectory=<%= $agent_install_dir %>
|
||||||
|
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||||
|
Environment=FLOWERCORE_DM_DEVICE_REGISTRATION=<%= $registration_path %>
|
||||||
|
Environment=Divoom__Bluetooth__DeviceInfoIsRenderProof=false
|
||||||
|
Environment=Divoom__Bluetooth__VisibleRenderProofRequired=true
|
||||||
|
Environment=Divoom__Bluetooth__A2dpDefaultState=off
|
||||||
|
ExecStart=<%= $agent_binary_path %> --mode=Pi --device-id=<%= $device_id %> --dm-web-url=<%= $dm_web_url %> --registration=<%= $registration_path %>
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=3
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
SupplementaryGroups=bluetooth audio dialout
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=<%= $state_dir %> <%= $log_dir %>
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
44
apps/fc-divoom-tv-pi/README.md
Normal file
44
apps/fc-divoom-tv-pi/README.md
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# FlowerCore Divoom TV Pi HDMI
|
||||||
|
|
||||||
|
Source-controlled deploy shape for the native `FlowerCore.Divoom.Tv`
|
||||||
|
Avalonia HDMI renderer on a Raspberry Pi connected to a TV.
|
||||||
|
|
||||||
|
This is a Puppet/systemd appliance bundle, not a Kubernetes application. It
|
||||||
|
mirrors the existing `fc-signage-pi-player` pattern: bluejay-infra carries the
|
||||||
|
systemd units, scripts, Hiera shape, and Puppet profile source that
|
||||||
|
`FlowerCore.Puppet` vendors and installs.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Launch the future `FlowerCore.Divoom.Tv` linux-arm64 self-contained payload
|
||||||
|
from `/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv`.
|
||||||
|
- Prefer `cage` as the Wayland fullscreen compositor, with direct app launch as
|
||||||
|
a fallback for development images.
|
||||||
|
- Restart the app after HDMI hotplug with a 2 second DRM settle delay.
|
||||||
|
- Keep all runtime state local: `/var/lib/fc-divoom-tv` and
|
||||||
|
`/var/log/fc-divoom-tv`.
|
||||||
|
- Avoid CDN/runtime fetches; the app renders the in-house Divoom scene catalog
|
||||||
|
locally.
|
||||||
|
|
||||||
|
## Artifact Map
|
||||||
|
|
||||||
|
| Path | Use |
|
||||||
|
| --- | --- |
|
||||||
|
| `systemd/flowercore-divoom-tv.service` | Fullscreen Avalonia HDMI app service. |
|
||||||
|
| `systemd/flowercore-divoom-tv-hdmi.service` | HDMI hotplug responder service. |
|
||||||
|
| `systemd/99-flowercore-divoom-tv-hdmi.rules` | DRM udev hotplug rule. |
|
||||||
|
| `scripts/flowercore-divoom-tv-prelaunch.sh` | Preflight checks and local directory creation. |
|
||||||
|
| `scripts/flowercore-divoom-tv-launch.sh` | Cage-first fullscreen launcher. |
|
||||||
|
| `scripts/flowercore-divoom-tv-hdmi-respond.sh` | Hotplug settle and restart script. |
|
||||||
|
| `puppet/profile/pi/service/divoom_tv.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet`. |
|
||||||
|
| `hiera/example-divoom-tv-pi.iamworkin.lan.yaml` | Example node Hiera for a Divoom TV Pi. |
|
||||||
|
|
||||||
|
## Rollout Notes
|
||||||
|
|
||||||
|
1. Build `FlowerCore.Divoom.Tv` with `dotnet.exe publish -c Release -r linux-arm64 --self-contained`.
|
||||||
|
2. Stage the payload to `/opt/flowercore/divoom-tv/` through the standard noc1
|
||||||
|
jump path and avoid `/tmp` for unprivileged Pi scratch.
|
||||||
|
3. Vendor the profile and static files into `FlowerCore.Puppet`.
|
||||||
|
4. Run Puppet noop, then apply on the target Pi.
|
||||||
|
5. Prove deployment with `systemctl is-active flowercore-divoom-tv.service`,
|
||||||
|
journal lines showing frames presented, and a visible HDMI display check.
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
# Example node data for a dedicated Pi -> HDMI -> TV Divoom renderer.
|
||||||
|
# Copy into FlowerCore.Puppet data/nodes/<hostname>.iamworkin.lan.yaml only
|
||||||
|
# after the Pi has a static DHCP/DNS entry and the linux-arm64 payload exists.
|
||||||
|
|
||||||
|
facts:
|
||||||
|
role: pi_prototype
|
||||||
|
|
||||||
|
profile::motd::role: 'Divoom TV HDMI Renderer'
|
||||||
|
|
||||||
|
profile::pi::service::divoom_tv::ensure: 'present'
|
||||||
|
profile::pi::service::divoom_tv::service_enabled: true
|
||||||
|
profile::pi::service::divoom_tv::service_ensure: 'running'
|
||||||
|
profile::pi::service::divoom_tv::install_dir: '/opt/flowercore/divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::state_dir: '/var/lib/fc-divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::log_dir: '/var/log/fc-divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::presentation_mode: 'PillarboxSquare'
|
||||||
|
profile::pi::service::divoom_tv::startup_scene: 'bluejay-clock'
|
||||||
|
profile::pi::service::divoom_tv::reduced_motion: false
|
||||||
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_tv.pp.
|
||||||
|
# Static files come from profile/pi/fc_divoom_tv/ after this bluejay-infra
|
||||||
|
# bundle is vendored into the Puppet control repo.
|
||||||
|
class profile::pi::service::divoom_tv (
|
||||||
|
Enum['present', 'absent'] $ensure = 'present',
|
||||||
|
Boolean $service_enabled = false,
|
||||||
|
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||||
|
String $service_name = 'flowercore-divoom-tv',
|
||||||
|
String $user = 'fc-divoom-tv',
|
||||||
|
String $group = 'fc-divoom-tv',
|
||||||
|
String $install_dir = '/opt/flowercore/divoom-tv',
|
||||||
|
String $state_dir = '/var/lib/fc-divoom-tv',
|
||||||
|
String $log_dir = '/var/log/fc-divoom-tv',
|
||||||
|
String $presentation_mode = 'PillarboxSquare',
|
||||||
|
String $startup_scene = 'bluejay-clock',
|
||||||
|
Boolean $reduced_motion = false,
|
||||||
|
) {
|
||||||
|
include profile::workstation::safe_account_exclusion
|
||||||
|
|
||||||
|
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||||
|
|
||||||
|
if $safe_account {
|
||||||
|
notify { 'fc-divoom-tv safe-account exclusion':
|
||||||
|
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom TV Pi profile refused to apply on operator workstation',
|
||||||
|
}
|
||||||
|
} elsif $ensure == 'absent' {
|
||||||
|
service { $service_name:
|
||||||
|
ensure => stopped,
|
||||||
|
enable => false,
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [
|
||||||
|
"/etc/systemd/system/${service_name}.service",
|
||||||
|
"/etc/systemd/system/${service_name}-hdmi.service",
|
||||||
|
'/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-launch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh',
|
||||||
|
'/etc/flowercore/divoom-tv.env',
|
||||||
|
]:
|
||||||
|
ensure => absent,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
case $facts['os']['family'] {
|
||||||
|
'Debian': {}
|
||||||
|
default: { fail("profile::pi::service::divoom_tv only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||||
|
}
|
||||||
|
|
||||||
|
package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']:
|
||||||
|
ensure => installed,
|
||||||
|
}
|
||||||
|
|
||||||
|
group { $group:
|
||||||
|
ensure => present,
|
||||||
|
system => true,
|
||||||
|
}
|
||||||
|
|
||||||
|
user { $user:
|
||||||
|
ensure => present,
|
||||||
|
system => true,
|
||||||
|
gid => $group,
|
||||||
|
home => $state_dir,
|
||||||
|
managehome => false,
|
||||||
|
shell => '/usr/sbin/nologin',
|
||||||
|
require => Group[$group],
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [$install_dir, $state_dir, $log_dir, '/etc/flowercore']:
|
||||||
|
ensure => directory,
|
||||||
|
owner => $user,
|
||||||
|
group => $group,
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/flowercore/divoom-tv.env':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => "FC_DIVOOM_TV_PRESENTATION_MODE=${presentation_mode}\nFC_DIVOOM_TV_START_SCENE=${startup_scene}\nFC_DIVOOM_TV_REDUCED_MOTION=${reduced_motion}\n",
|
||||||
|
require => File['/etc/flowercore'],
|
||||||
|
}
|
||||||
|
|
||||||
|
$script_map = {
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-prelaunch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-launch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi-respond.sh',
|
||||||
|
}
|
||||||
|
|
||||||
|
$script_map.each |$dest, $src| {
|
||||||
|
file { $dest:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0755',
|
||||||
|
source => "puppet:///modules/${src}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$unit_map = {
|
||||||
|
"/etc/systemd/system/${service_name}.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service',
|
||||||
|
"/etc/systemd/system/${service_name}-hdmi.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi.service',
|
||||||
|
}
|
||||||
|
|
||||||
|
$unit_map.each |$dest, $src| {
|
||||||
|
file { $dest:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
source => "puppet:///modules/${src}",
|
||||||
|
notify => Exec['fc-divoom-tv-systemd-reload'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
source => 'puppet:///modules/profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules',
|
||||||
|
notify => Exec['fc-divoom-tv-udev-reload'],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-tv-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-tv-udev-reload':
|
||||||
|
command => '/usr/bin/udevadm control --reload-rules',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
service { $service_name:
|
||||||
|
ensure => $service_ensure,
|
||||||
|
enable => $service_enabled,
|
||||||
|
require => [
|
||||||
|
File["/etc/systemd/system/${service_name}.service"],
|
||||||
|
File['/etc/flowercore/divoom-tv.env'],
|
||||||
|
File['/usr/local/bin/flowercore-divoom-tv-prelaunch.sh'],
|
||||||
|
File['/usr/local/bin/flowercore-divoom-tv-launch.sh'],
|
||||||
|
Exec['fc-divoom-tv-systemd-reload'],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
sleep 2
|
||||||
|
systemctl restart flowercore-divoom-tv.service
|
||||||
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||||
|
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||||
|
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||||
|
PRESENTATION_MODE="${FC_DIVOOM_TV_PRESENTATION_MODE:-PillarboxSquare}"
|
||||||
|
START_SCENE="${FC_DIVOOM_TV_START_SCENE:-bluejay-clock}"
|
||||||
|
REDUCED_MOTION="${FC_DIVOOM_TV_REDUCED_MOTION:-false}"
|
||||||
|
|
||||||
|
COMMON_ARGS=(
|
||||||
|
"--target=hdmi"
|
||||||
|
"--presentation-mode=${PRESENTATION_MODE}"
|
||||||
|
"--startup-scene=${START_SCENE}"
|
||||||
|
"--reduced-motion=${REDUCED_MOTION}"
|
||||||
|
"--state-dir=${STATE_DIR}"
|
||||||
|
"--log-dir=${LOG_DIR}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if command -v cage >/dev/null 2>&1; then
|
||||||
|
exec cage -- "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -Is)] cage not found; launching FlowerCore.Divoom.Tv directly" >&2
|
||||||
|
exec "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||||
|
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||||
|
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||||
|
|
||||||
|
mkdir -p "${STATE_DIR}" "${LOG_DIR}"
|
||||||
|
|
||||||
|
if [[ ! -x "${APP_BIN}" ]]; then
|
||||||
|
echo "[$(date -Is)] missing executable ${APP_BIN}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -d /sys/class/drm ]] && ! find /sys/class/drm -maxdepth 1 -name 'card*-HDMI-A-*' -print -quit | grep -q .; then
|
||||||
|
echo "[$(date -Is)] no HDMI connector visible yet; continuing so the app can wait for display" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v cage >/dev/null 2>&1; then
|
||||||
|
echo "[$(date -Is)] cage available for fullscreen Wayland launch"
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] cage not installed; direct launch fallback will be used" >&2
|
||||||
|
fi
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Settle DRM for 2s before restarting the fullscreen Avalonia renderer.
|
||||||
|
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-divoom-tv-hdmi.service"
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom TV HDMI hotplug responder
|
||||||
|
DefaultDependencies=no
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh
|
||||||
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom TV HDMI Renderer (Avalonia fullscreen)
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target systemd-user-sessions.service
|
||||||
|
ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=fc-divoom-tv
|
||||||
|
Group=fc-divoom-tv
|
||||||
|
WorkingDirectory=/opt/flowercore/divoom-tv
|
||||||
|
EnvironmentFile=-/etc/flowercore/divoom-tv.env
|
||||||
|
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||||
|
Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv
|
||||||
|
RuntimeDirectory=fc-divoom-tv
|
||||||
|
RuntimeDirectoryMode=0700
|
||||||
|
ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh
|
||||||
|
ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
MemoryMax=2G
|
||||||
|
MemoryHigh=1500M
|
||||||
|
PrivateTmp=true
|
||||||
|
NoNewPrivileges=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=/var/lib/fc-divoom-tv /var/log/fc-divoom-tv /run/fc-divoom-tv
|
||||||
|
TTYPath=/dev/tty1
|
||||||
|
StandardInput=tty
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
TTYReset=yes
|
||||||
|
TTYVHangup=yes
|
||||||
|
TTYVTDisallocate=yes
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=graphical.target
|
||||||
@@ -87,6 +87,20 @@ spec:
|
|||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
spec:
|
spec:
|
||||||
|
# Use an explicit DNS policy so external FQDNs like api.anthropic.com are
|
||||||
|
# resolved directly instead of being expanded through the cluster search
|
||||||
|
# path that includes iamworkin.lan.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- fc-llm-bridge.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 1654
|
fsGroup: 1654
|
||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
@@ -97,7 +111,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
|
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
|
||||||
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-llm-bridge:v202604292028
|
image: localhost/fc-llm-bridge:v202604300022
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
@@ -211,17 +225,6 @@ spec:
|
|||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 15
|
initialDelaySeconds: 15
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
# Lower ndots so external FQDNs like api.anthropic.com are tried BEFORE
|
|
||||||
# the ndots:5 default expands them through the cluster search path, which
|
|
||||||
# includes iamworkin.lan. CoreDNS has a `template IN A iamworkin.lan`
|
|
||||||
# wildcard that answers `api.anthropic.com.iamworkin.lan` with the
|
|
||||||
# Traefik VIP, which then serves a TRAEFIK-DEFAULT-CERT TLS cert and
|
|
||||||
# breaks egress to the real Anthropic API (memory:
|
|
||||||
# feedback_coredns_ndots_template_collision, generalized to external DNS).
|
|
||||||
dnsConfig:
|
|
||||||
options:
|
|
||||||
- name: ndots
|
|
||||||
value: "2"
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: data
|
- name: data
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
|
|||||||
@@ -69,16 +69,14 @@ spec:
|
|||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
failureThreshold: 3
|
failureThreshold: 3
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
|||||||
171
apps/fc-redis/fc-redis.yaml
Normal file
171
apps/fc-redis/fc-redis.yaml
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
# fc-redis — SignalR backplane for cross-product event bus
|
||||||
|
#
|
||||||
|
# Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
|
||||||
|
# not Phase C as originally drafted. Operator directive: "Redis can be
|
||||||
|
# deployed just fine as it's another FlowerCore technology we'll want to
|
||||||
|
# manage."
|
||||||
|
#
|
||||||
|
# Phase A scope (this file):
|
||||||
|
# - Single Redis 7.x Alpine pod
|
||||||
|
# - 1Gi Longhorn RWO PVC for AOF persistence
|
||||||
|
# - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
|
||||||
|
# - No AUTH (in-cluster only; not exposed externally)
|
||||||
|
# - No IngressRoute (backplane is server-to-server only)
|
||||||
|
#
|
||||||
|
# Consumers (Phase A IMPL across FC services):
|
||||||
|
# - FlowerCore.Signage.Web (OpsConsoleHub)
|
||||||
|
# - FlowerCore.Scoreboard.Web (ScoreboardHub)
|
||||||
|
# - FlowerCore.SignalControl.Web
|
||||||
|
# - FlowerCore.DMS.Web
|
||||||
|
# - Any other product joining the cross-product event bus
|
||||||
|
#
|
||||||
|
# Each consumer adds:
|
||||||
|
# services.AddSignalR()
|
||||||
|
# .AddStackExchangeRedis(
|
||||||
|
# "redis.fc-redis.svc.cluster.local:6379",
|
||||||
|
# opts => opts.Configuration.ChannelPrefix =
|
||||||
|
# StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
|
||||||
|
#
|
||||||
|
# Phase B / C follow-ons (out of scope here):
|
||||||
|
# - Redis Sentinel for HA (3-node)
|
||||||
|
# - AUTH password from 1Password Connect (rotate via /rotate-password)
|
||||||
|
# - redis_exporter sidecar for Prometheus scrape
|
||||||
|
# - Network policies restricting which namespaces can dial 6379
|
||||||
|
#
|
||||||
|
# Design: docs/signage/operations-console-phase-2-design.md §3.5
|
||||||
|
# Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
|
||||||
|
# Memory: feedback_blooming_ui_pattern_no_iframes
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-data
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-config
|
||||||
|
namespace: fc-redis
|
||||||
|
data:
|
||||||
|
redis.conf: |
|
||||||
|
# Phase A — minimal config; no AUTH, no replication.
|
||||||
|
bind 0.0.0.0
|
||||||
|
protected-mode no
|
||||||
|
port 6379
|
||||||
|
tcp-backlog 511
|
||||||
|
timeout 0
|
||||||
|
tcp-keepalive 300
|
||||||
|
|
||||||
|
# Persistence: AOF (fsync every second is the standard SignalR-backplane
|
||||||
|
# durability sweet spot — the backplane only needs to survive Redis
|
||||||
|
# restarts, not absolute zero loss).
|
||||||
|
appendonly yes
|
||||||
|
appendfsync everysec
|
||||||
|
auto-aof-rewrite-percentage 100
|
||||||
|
auto-aof-rewrite-min-size 64mb
|
||||||
|
|
||||||
|
# Reasonable defaults — let Redis pick most things.
|
||||||
|
maxmemory-policy allkeys-lru
|
||||||
|
maxmemory 256mb
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
loglevel notice
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
namespace: fc-redis
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # RWO PVC; do not do rolling update
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 999 # redis:7-alpine default uid
|
||||||
|
runAsGroup: 999
|
||||||
|
fsGroup: 999
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command: ["redis-server", "/etc/redis/redis.conf"]
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
containerPort: 6379
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "50m"
|
||||||
|
memory: "128Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "384Mi"
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/redis
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 6379
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: [ALL]
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: fc-redis-data
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: fc-redis-config
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
protocol: TCP
|
||||||
14
apps/fc-signage-appletv/README.md
Normal file
14
apps/fc-signage-appletv/README.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# fc-signage-appletv
|
||||||
|
|
||||||
|
Apple TV signage is a sealed appliance running the `FlowerCore.Signage.Agent.AppleTv` tvOS app per ADR-134.
|
||||||
|
|
||||||
|
This ApplicationSet entry is documentation and inventory metadata only. It intentionally creates no `Deployment`, `Service`, or `Pod`.
|
||||||
|
|
||||||
|
The Apple TV app connects outbound to existing FC.Signage.Web surfaces:
|
||||||
|
|
||||||
|
- `https://signage.iamworkin.lan/hub/signage` for SignalR live status.
|
||||||
|
- `GET /api/v1/nodes/{nodeId}/state` for the 30 second polling fallback.
|
||||||
|
- `POST /api/v1/nodes/register` and `POST /api/v1/nodes/{nodeId}/enroll` for pairing and mTLS enrollment.
|
||||||
|
- `POST /api/v1/nodes/{nodeId}/heartbeat` for metrics, current content identity, and local audit excerpts.
|
||||||
|
|
||||||
|
Distribution is via Apple Developer Enterprise Program or TestFlight plus FC.Distribution / UpdateCenter publishing once Apple credentials are available.
|
||||||
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- manifest.yaml
|
||||||
26
apps/fc-signage-appletv/manifest.yaml
Normal file
26
apps/fc-signage-appletv/manifest.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Apple TV signage is a sealed tvOS appliance. This ArgoCD app intentionally
|
||||||
|
# carries documentation metadata only; no Deployment, Service, or Pod resources
|
||||||
|
# are created for the player.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-signage-appletv-docs
|
||||||
|
namespace: fc-signage
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-signage-appletv
|
||||||
|
app.kubernetes.io/part-of: flowercore-signage
|
||||||
|
flowercore.io/manifest-kind: docs-only
|
||||||
|
data:
|
||||||
|
README: |
|
||||||
|
FlowerCore.Signage.Agent.AppleTv is distributed through Apple Developer
|
||||||
|
Enterprise Program or TestFlight, not Kubernetes.
|
||||||
|
|
||||||
|
The app connects outbound to FC.Signage.Web:
|
||||||
|
- SignalR: https://signage.iamworkin.lan/hub/signage
|
||||||
|
- Polling fallback: GET /api/v1/nodes/{nodeId}/state
|
||||||
|
- Enrollment: POST /api/v1/nodes/{nodeId}/enroll
|
||||||
|
- Heartbeat: POST /api/v1/nodes/{nodeId}/heartbeat
|
||||||
|
|
||||||
|
This placeholder gives ArgoCD and inventory dashboards a first-class
|
||||||
|
Apple TV signage app entry without creating runtime pods.
|
||||||
17
apps/fc-signage-pi-player/README.md
Normal file
17
apps/fc-signage-pi-player/README.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# FlowerCore Signage Pi Player
|
||||||
|
|
||||||
|
Phase 1 Raspberry Pi signage player packaging for Chromium kiosk deployments.
|
||||||
|
This bundle is intentionally air-gap friendly: systemd units, shell scripts,
|
||||||
|
udev rules, and Chromium managed policy are all checked into the repo and are
|
||||||
|
installed by `FlowerCore.Puppet`.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Bootstrap a stable node identity and mTLS client certificate.
|
||||||
|
- Launch Chromium in kiosk mode against `FC.Signage.Web` player routes.
|
||||||
|
- Restart the kiosk on HDMI hotplug.
|
||||||
|
- Renew mTLS certificates daily when fewer than 30 days remain.
|
||||||
|
- Detect display capabilities at boot, daily, and on HDMI hotplug.
|
||||||
|
|
||||||
|
Phase 2 native Avalonia rendering is documented separately in Notes and remains
|
||||||
|
deferred.
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"AutofillAddressEnabled": false,
|
||||||
|
"AutofillCreditCardEnabled": false,
|
||||||
|
"PasswordManagerEnabled": false,
|
||||||
|
"BrowserSignin": 0,
|
||||||
|
"MetricsReportingEnabled": false,
|
||||||
|
"SafeBrowsingProtectionLevel": 0,
|
||||||
|
"DefaultNotificationsSetting": 2,
|
||||||
|
"DefaultPopupsSetting": 2,
|
||||||
|
"BackgroundModeEnabled": false,
|
||||||
|
"DefaultBrowserSettingEnabled": false,
|
||||||
|
"PromotionalTabsEnabled": false,
|
||||||
|
"CommandLineFlagSecurityWarningsEnabled": false,
|
||||||
|
"ExtensionInstallBlocklist": ["*"]
|
||||||
|
}
|
||||||
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
|
||||||
|
CONNECTORS=()
|
||||||
|
for dir in /sys/class/drm/card*-HDMI-A-*; do
|
||||||
|
[[ -e "$dir/status" ]] || continue
|
||||||
|
if [[ "$(cat "$dir/status")" == "connected" ]]; then
|
||||||
|
CONNECTORS+=("$(basename "$dir")")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ${#CONNECTORS[@]} -eq 0 ]]; then
|
||||||
|
CAPABILITIES_JSON=$(jq -n --arg id "$NODE_ID" '{
|
||||||
|
nodeId: $id,
|
||||||
|
platform: "linux-arm64-pi",
|
||||||
|
displayConnected: false,
|
||||||
|
detectedAt: (now | todate),
|
||||||
|
note: "No HDMI display detected"
|
||||||
|
}')
|
||||||
|
else
|
||||||
|
PRIMARY="${CONNECTORS[0]}"
|
||||||
|
EDID_PATH="/sys/class/drm/${PRIMARY}/edid"
|
||||||
|
WIDTH=0
|
||||||
|
HEIGHT=0
|
||||||
|
REFRESH=60
|
||||||
|
HDR=false
|
||||||
|
AUDIO_HDMI=false
|
||||||
|
MFG=""
|
||||||
|
MODEL=""
|
||||||
|
PHYSICAL_SIZE=null
|
||||||
|
|
||||||
|
if [[ -s "$EDID_PATH" ]] && command -v edid-decode >/dev/null 2>&1; then
|
||||||
|
EDID_INFO=$(edid-decode < "$EDID_PATH" 2>/dev/null || true)
|
||||||
|
MFG=$(echo "$EDID_INFO" | grep -m1 -oP 'Manufacturer:\s*\K\S+' || true)
|
||||||
|
MODEL=$(echo "$EDID_INFO" | grep -m1 -oP 'Model:\s*\K\S+' || true)
|
||||||
|
PREF=$(echo "$EDID_INFO" | grep -m1 -oP '\d+x\d+\s*@\s*\d+(?:\.\d+)?\s*Hz' || true)
|
||||||
|
if [[ -n "$PREF" ]]; then
|
||||||
|
WIDTH=$(echo "$PREF" | grep -oP '^\d+')
|
||||||
|
HEIGHT=$(echo "$PREF" | grep -oP 'x\K\d+')
|
||||||
|
REFRESH=$(echo "$PREF" | grep -oP '@\s*\K[\d.]+' | cut -d. -f1)
|
||||||
|
fi
|
||||||
|
if echo "$EDID_INFO" | grep -qiE 'HDR (Static|Dynamic) Metadata Block'; then HDR=true; fi
|
||||||
|
if echo "$EDID_INFO" | grep -qiE 'CEA Audio Block|Audio Format Descriptor'; then AUDIO_HDMI=true; fi
|
||||||
|
PH_W=$(echo "$EDID_INFO" | grep -m1 -oP 'Maximum image size:\s*\K\d+\s*cm\s*x\s*\d+' || true)
|
||||||
|
if [[ -n "$PH_W" ]]; then
|
||||||
|
PH_CM_W=$(echo "$PH_W" | grep -oP '^\d+')
|
||||||
|
PH_CM_H=$(echo "$PH_W" | grep -oP 'x\s*\K\d+')
|
||||||
|
if (( PH_CM_W > 0 && PH_CM_H > 0 )); then
|
||||||
|
PHYSICAL_SIZE=$(awk -v w="$PH_CM_W" -v h="$PH_CM_H" 'BEGIN { printf "%.1f", sqrt(w*w + h*h)/2.54 }')
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$WIDTH" == "0" ]] && command -v kmsprint >/dev/null 2>&1; then
|
||||||
|
KMS=$(kmsprint 2>/dev/null | grep -A2 "$PRIMARY" | grep -oP '\d+x\d+' | head -1 || true)
|
||||||
|
if [[ -n "$KMS" ]]; then
|
||||||
|
WIDTH=$(echo "$KMS" | grep -oP '^\d+')
|
||||||
|
HEIGHT=$(echo "$KMS" | grep -oP 'x\K\d+')
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AUDIO_ALSA=false
|
||||||
|
if aplay -l 2>/dev/null | grep -qi 'card.*HDMI'; then AUDIO_ALSA=true; fi
|
||||||
|
HAS_AUDIO=false
|
||||||
|
if [[ "$AUDIO_HDMI" == "true" && "$AUDIO_ALSA" == "true" ]]; then HAS_AUDIO=true; fi
|
||||||
|
|
||||||
|
CAPABILITIES_JSON=$(jq -n \
|
||||||
|
--arg id "$NODE_ID" \
|
||||||
|
--argjson w "$WIDTH" \
|
||||||
|
--argjson h "$HEIGHT" \
|
||||||
|
--argjson r "$REFRESH" \
|
||||||
|
--argjson hdr "$HDR" \
|
||||||
|
--argjson audio "$HAS_AUDIO" \
|
||||||
|
--arg connector "$PRIMARY" \
|
||||||
|
--arg mfg "$MFG" \
|
||||||
|
--arg model "$MODEL" \
|
||||||
|
--argjson size "$PHYSICAL_SIZE" \
|
||||||
|
'{
|
||||||
|
nodeId: $id,
|
||||||
|
platform: "linux-arm64-pi",
|
||||||
|
displayConnected: true,
|
||||||
|
detectedAt: (now | todate),
|
||||||
|
hardware: {
|
||||||
|
maxResolution: { width: $w, height: $h },
|
||||||
|
nativeResolution: { width: $w, height: $h },
|
||||||
|
refreshRateHz: $r,
|
||||||
|
colorDepth: ($hdr | if . then "Color30Hdr" else "Color24" end),
|
||||||
|
hasAudioOutput: $audio,
|
||||||
|
audioChannelCount: ($audio | if . then 2 else 0 end),
|
||||||
|
physicalSizeInches: $size,
|
||||||
|
connector: $connector,
|
||||||
|
manufacturer: $mfg,
|
||||||
|
modelName: $model
|
||||||
|
},
|
||||||
|
render: { codecs: ["h264", "vp9", "mp4"] }
|
||||||
|
}')
|
||||||
|
fi
|
||||||
|
|
||||||
|
ENDPOINT_CANDIDATES=(
|
||||||
|
"${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/capabilities"
|
||||||
|
"${SIGNAGE_URL}/api/v1/displays/${NODE_ID}/capability-profile"
|
||||||
|
)
|
||||||
|
|
||||||
|
SUCCESS=false
|
||||||
|
for url in "${ENDPOINT_CANDIDATES[@]}"; do
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/cap-response.json -w "%{http_code}" \
|
||||||
|
--max-time 10 \
|
||||||
|
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||||
|
-X POST "$url" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$CAPABILITIES_JSON" || echo "000")
|
||||||
|
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" || "$HTTP_STATUS" == "204" ]]; then
|
||||||
|
SUCCESS=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
if [[ "$SUCCESS" != "true" ]]; then
|
||||||
|
echo "[$(date -Is)] capability declare: no endpoint accepted the profile; logging locally" \
|
||||||
|
| tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
echo "$CAPABILITIES_JSON" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] capability declare: ok ($url)" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$CAPABILITIES_JSON"
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
SETUP_CODE_FILE="/etc/flowercore/signage-setup-code"
|
||||||
|
|
||||||
|
mkdir -p /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||||
|
chown fc-signage:fc-signage /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||||
|
chmod 0750 "$CERT_DIR"
|
||||||
|
|
||||||
|
if [[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]; then
|
||||||
|
ENROLLED=$(jq -r '.enrolledAt // empty' "$NODE_JSON")
|
||||||
|
if [[ -n "$ENROLLED" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: already enrolled at $ENROLLED; skipping"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -s "$NODE_JSON" ]]; then
|
||||||
|
NODE_UUID=$(jq -r '.nodeUuid // empty' "$NODE_JSON")
|
||||||
|
MACHINE_ID=$(jq -r '.machineId // empty' "$NODE_JSON")
|
||||||
|
else
|
||||||
|
NODE_UUID=$(uuidgen)
|
||||||
|
MACHINE_ID=$(echo "$NODE_UUID" | tr -d '-' | cut -c1-16)
|
||||||
|
jq -n --arg uuid "$NODE_UUID" --arg machine "$MACHINE_ID" --arg host "$(hostname -f)" --arg ts "$(date -Is)" \
|
||||||
|
'{nodeUuid: $uuid, machineId: $machine, hostname: $host, platform: "linux-arm64-pi", createdAt: $ts}' \
|
||||||
|
> "$NODE_JSON"
|
||||||
|
chmod 0640 "$NODE_JSON"
|
||||||
|
chown fc-signage:fc-signage "$NODE_JSON"
|
||||||
|
fi
|
||||||
|
|
||||||
|
SETUP_CODE=""
|
||||||
|
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||||
|
SETUP_CODE=$(tr -d '\r\n\t ' < "$SETUP_CODE_FILE")
|
||||||
|
fi
|
||||||
|
|
||||||
|
MODEL=$(tr -d '\0' < /sys/firmware/devicetree/base/model 2>/dev/null || echo Unknown)
|
||||||
|
REG_PAYLOAD=$(jq -n \
|
||||||
|
--arg machine "$MACHINE_ID" \
|
||||||
|
--arg name "$(hostname -f)" \
|
||||||
|
--arg setup "$SETUP_CODE" \
|
||||||
|
--arg resolution "1920x1080" \
|
||||||
|
--arg model "$MODEL" \
|
||||||
|
'{
|
||||||
|
machineId: $machine,
|
||||||
|
name: $name,
|
||||||
|
setupCode: ($setup | if . == "" then null else . end),
|
||||||
|
resolution: $resolution,
|
||||||
|
hardwareModel: $model,
|
||||||
|
platform: "linux-arm64-pi"
|
||||||
|
}')
|
||||||
|
|
||||||
|
for attempt in 1 2; do
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/register-response.json -w "%{http_code}" \
|
||||||
|
--max-time 15 \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/register" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$REG_PAYLOAD" || echo "000")
|
||||||
|
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "[$(date -Is)] bootstrap: register attempt $attempt returned $HTTP_STATUS" >&2
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: register failed after 2 attempts" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODE_ID=$(jq -r '.nodeId // empty' /tmp/register-response.json)
|
||||||
|
if [[ -z "$NODE_ID" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: register response did not include nodeId" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
jq --arg id "$NODE_ID" '.nodeId = $id' "$NODE_JSON" > "${NODE_JSON}.tmp" && mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||||
|
|
||||||
|
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||||
|
curl -sk -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/approve-via-setup-code" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"setupCode\":\"${SETUP_CODE}\"}" \
|
||||||
|
-o /dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
STATUS=""
|
||||||
|
DEADLINE=$(( $(date +%s) + 1800 ))
|
||||||
|
while (( $(date +%s) < DEADLINE )); do
|
||||||
|
STATUS=$(curl -sk --max-time 5 "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/status" | jq -r '.status // empty')
|
||||||
|
if [[ "$STATUS" == "Approved" || "$STATUS" == "Enrolled" || "$STATUS" == "Online" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 15
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$STATUS" != "Approved" && "$STATUS" != "Enrolled" && "$STATUS" != "Online" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: approval not granted within 30min budget" >&2
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
KEY_PATH="${CERT_DIR}/client.key"
|
||||||
|
CSR_PATH="${CERT_DIR}/client.csr"
|
||||||
|
openssl ecparam -genkey -name prime256v1 -out "$KEY_PATH"
|
||||||
|
openssl req -new -key "$KEY_PATH" -out "$CSR_PATH" \
|
||||||
|
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||||
|
|
||||||
|
ENROLL_PAYLOAD=$(jq -n --arg csr "$(cat "$CSR_PATH")" '{certificateSigningRequest: $csr}')
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/enroll-response.json -w "%{http_code}" \
|
||||||
|
--max-time 15 \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/enroll" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$ENROLL_PAYLOAD")
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: enroll failed with HTTP $HTTP_STATUS" >&2
|
||||||
|
exit 4
|
||||||
|
fi
|
||||||
|
|
||||||
|
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/client.crt"
|
||||||
|
jq -r '.caCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/ca-chain.pem"
|
||||||
|
P12_PASS=$(openssl rand -hex 24)
|
||||||
|
echo -n "$P12_PASS" > "${CERT_DIR}/client.p12.pass"
|
||||||
|
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||||
|
|
||||||
|
openssl pkcs12 -export \
|
||||||
|
-inkey "$KEY_PATH" \
|
||||||
|
-in "${CERT_DIR}/client.crt" \
|
||||||
|
-certfile "${CERT_DIR}/ca-chain.pem" \
|
||||||
|
-out "${CERT_DIR}/client.p12" \
|
||||||
|
-password "pass:${P12_PASS}"
|
||||||
|
|
||||||
|
chown fc-signage:fc-signage "${CERT_DIR}"/* "$NODE_JSON"
|
||||||
|
chmod 0640 "${CERT_DIR}/client.p12" "${CERT_DIR}/client.crt" "${CERT_DIR}/ca-chain.pem" "$KEY_PATH"
|
||||||
|
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||||
|
|
||||||
|
EXPIRY=$(openssl x509 -in "${CERT_DIR}/client.crt" -enddate -noout | sed 's/notAfter=//')
|
||||||
|
jq --arg ts "$(date -Is)" --arg exp "$EXPIRY" \
|
||||||
|
'.enrolledAt = $ts | .certExpiry = $exp' "$NODE_JSON" > "${NODE_JSON}.tmp" \
|
||||||
|
&& mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||||
|
|
||||||
|
systemctl start flowercore-signage-detect-display.service || true
|
||||||
|
systemctl start flowercore-signage-player-pi.service || true
|
||||||
|
echo "[$(date -Is)] bootstrap: enrolled and kiosk started (NodeId=${NODE_ID})"
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
sleep 2
|
||||||
|
systemctl start flowercore-signage-detect-display.service || true
|
||||||
|
systemctl restart flowercore-signage-player-pi.service
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
|
||||||
|
CERT_THUMB=$(openssl pkcs12 -in "$CERT_DIR/client.p12" -passin file:"$CERT_DIR/client.p12.pass" -nodes -nokeys 2>/dev/null \
|
||||||
|
| openssl x509 -fingerprint -sha256 -noout \
|
||||||
|
| sed 's/.*=//' \
|
||||||
|
| tr -d ':')
|
||||||
|
|
||||||
|
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}/embed?token=${CERT_THUMB}"
|
||||||
|
HTTP_STATUS=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 5 \
|
||||||
|
--cert-type P12 --cert "$CERT_DIR/client.p12:$(cat "$CERT_DIR/client.p12.pass")" \
|
||||||
|
"$PLAYER_URL" || echo "000")
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "301" && "$HTTP_STATUS" != "302" ]]; then
|
||||||
|
echo "[$(date -Is)] /embed returned $HTTP_STATUS; falling back to /player/${NODE_ID}" \
|
||||||
|
>> /var/log/fc-signage-player/url-divergence.log
|
||||||
|
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}?token=${CERT_THUMB}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec chromium-browser \
|
||||||
|
--kiosk \
|
||||||
|
--noerrdialogs \
|
||||||
|
--disable-infobars \
|
||||||
|
--disable-translate \
|
||||||
|
--disable-features=TranslateUI,InfiniteSessionRestore \
|
||||||
|
--autoplay-policy=no-user-gesture-required \
|
||||||
|
--password-store=basic \
|
||||||
|
--user-data-dir=/var/lib/fc-signage-player/profile \
|
||||||
|
--disk-cache-dir=/var/lib/fc-signage-player/cache \
|
||||||
|
--disk-cache-size=104857600 \
|
||||||
|
--no-first-run \
|
||||||
|
--no-default-browser-check \
|
||||||
|
--check-for-update-interval=2592000 \
|
||||||
|
--enable-features=OverlayScrollbar \
|
||||||
|
--start-fullscreen \
|
||||||
|
--window-position=0,0 \
|
||||||
|
--window-size=1920,1080 \
|
||||||
|
"$PLAYER_URL"
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
|
||||||
|
for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass; do
|
||||||
|
if [[ ! -r "$f" ]]; then
|
||||||
|
echo "[$(date -Is)] prelaunch: missing or unreadable $f" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if openssl pkcs12 -in /etc/fc-signage-player/client.p12 -passin file:/etc/fc-signage-player/client.p12.pass -nokeys -clcerts 2>/dev/null \
|
||||||
|
| openssl x509 -checkend $((7*24*3600)) -noout; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] prelaunch: client cert expires within 7 days" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -Is)] prelaunch: ok" | tee -a /var/log/fc-signage-player/prelaunch.log
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
|
||||||
|
[[ -s "$CERT_DIR/client.crt" ]] || { echo "no cert to renew"; exit 0; }
|
||||||
|
|
||||||
|
if openssl x509 -in "$CERT_DIR/client.crt" -checkend $((30*24*3600)) -noout; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
NEW_KEY="$CERT_DIR/client.key.new"
|
||||||
|
NEW_CSR="$CERT_DIR/client.csr.new"
|
||||||
|
|
||||||
|
openssl ecparam -genkey -name prime256v1 -out "$NEW_KEY"
|
||||||
|
openssl req -new -key "$NEW_KEY" -out "$NEW_CSR" \
|
||||||
|
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||||
|
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/renew-response.json -w "%{http_code}" \
|
||||||
|
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/renew" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$(jq -n --arg csr "$(cat "$NEW_CSR")" '{certificateSigningRequest: $csr}')")
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] renew: failed HTTP $HTTP_STATUS; leaving old cert in place" >&2
|
||||||
|
exit 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/renew-response.json > "$CERT_DIR/client.crt.new"
|
||||||
|
jq -r '.caCertificatePem' /tmp/renew-response.json > "$CERT_DIR/ca-chain.pem.new"
|
||||||
|
P12_PASS=$(cat "$CERT_DIR/client.p12.pass")
|
||||||
|
openssl pkcs12 -export -inkey "$NEW_KEY" -in "$CERT_DIR/client.crt.new" \
|
||||||
|
-certfile "$CERT_DIR/ca-chain.pem.new" \
|
||||||
|
-out "$CERT_DIR/client.p12.new" -password "pass:${P12_PASS}"
|
||||||
|
|
||||||
|
mv "$CERT_DIR/client.key.new" "$CERT_DIR/client.key"
|
||||||
|
mv "$CERT_DIR/client.crt.new" "$CERT_DIR/client.crt"
|
||||||
|
mv "$CERT_DIR/ca-chain.pem.new" "$CERT_DIR/ca-chain.pem"
|
||||||
|
mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"
|
||||||
|
|
||||||
|
chown fc-signage:fc-signage "$CERT_DIR"/client.*
|
||||||
|
systemctl restart flowercore-signage-player-pi.service
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
|
||||||
|
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: first-boot identity + mTLS enrollment
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
Before=flowercore-signage-player-pi.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-bootstrap.sh
|
||||||
|
RemainAfterExit=yes
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
TimeoutStartSec=2100
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: detect connected display + declare capabilities
|
||||||
|
After=flowercore-signage-bootstrap.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
User=fc-signage
|
||||||
|
ExecStart=/usr/local/bin/fc-signage-detect-display
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Daily FlowerCore Signage Pi display capability redeclaration
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnCalendar=daily
|
||||||
|
RandomizedDelaySec=1h
|
||||||
|
Persistent=true
|
||||||
|
OnBootSec=30s
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi Player HDMI hotplug responder
|
||||||
|
DefaultDependencies=no
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-hdmi-respond.sh
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Digital Signage Pi Player (Chromium kiosk)
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/appletv-pi-signage-agents-design.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target graphical.target
|
||||||
|
ConditionPathExists=/etc/flowercore/signage-node.json
|
||||||
|
ConditionPathExists=/etc/fc-signage-player/client.p12
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=fc-signage
|
||||||
|
Group=fc-signage
|
||||||
|
WorkingDirectory=/var/lib/fc-signage-player
|
||||||
|
EnvironmentFile=-/etc/flowercore/signage-player.env
|
||||||
|
ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-launch.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
MemoryMax=2G
|
||||||
|
MemoryHigh=1500M
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=/var/lib/fc-signage-player /var/log/fc-signage-player
|
||||||
|
PrivateTmp=true
|
||||||
|
NoNewPrivileges=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=graphical.target
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: cert renewal worker
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-renew-cert.sh
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Daily check for FlowerCore Signage Pi cert renewal
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnCalendar=daily
|
||||||
|
RandomizedDelaySec=2h
|
||||||
|
Persistent=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
|
||||||
|
script="$(cat "$DETECT")"
|
||||||
|
[[ "$script" == *"displayConnected: false"* ]]
|
||||||
|
[[ "$script" == *"No HDMI display detected"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
|
||||||
|
script="$(cat "$DETECT")"
|
||||||
|
[[ "$script" == *"edid-decode"* ]]
|
||||||
|
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
|
||||||
|
[[ "$script" == *"kmsprint"* ]]
|
||||||
|
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
|
||||||
|
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
|
||||||
|
[[ "$script" == *"capabilities.log"* ]]
|
||||||
|
}
|
||||||
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
|
||||||
|
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap is idempotent when node is already enrolled" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
|
||||||
|
[[ "$script" == *"already enrolled"* ]]
|
||||||
|
[[ "$script" == *"exit 0"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap generates a stable node uuid and machine id" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"uuidgen"* ]]
|
||||||
|
[[ "$script" == *"nodeUuid"* ]]
|
||||||
|
[[ "$script" == *"machineId"* ]]
|
||||||
|
[[ "$script" == *"cut -c1-16"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap posts to the canonical register endpoint" {
|
||||||
|
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
|
||||||
|
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap retries registration once for first-call races" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"for attempt in 1 2"* ]]
|
||||||
|
[[ "$script" == *"register attempt \$attempt returned"* ]]
|
||||||
|
[[ "$script" == *"sleep 5"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap supports setup-code approval with manual polling fallback" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"signage-setup-code"* ]]
|
||||||
|
[[ "$script" == *"approve-via-setup-code"* ]]
|
||||||
|
[[ "$script" == *"+ 1800"* ]]
|
||||||
|
[[ "$script" == *"sleep 15"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
|
||||||
|
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"openssl pkcs12 -export"* ]]
|
||||||
|
[[ "$script" == *"client.p12.pass"* ]]
|
||||||
|
[[ "$script" == *"chmod 0640"* ]]
|
||||||
|
[[ "$script" == *"chmod 0600"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
|
||||||
|
script="$(cat "$RENEW")"
|
||||||
|
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
|
||||||
|
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
|
||||||
|
[[ "$script" == *"client.key.new"* ]]
|
||||||
|
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
|
||||||
|
}
|
||||||
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit exists" {
|
||||||
|
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit uses simple chromium service with restart backoff" {
|
||||||
|
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
|
||||||
|
[[ "$unit" == *"Type=simple"* ]]
|
||||||
|
[[ "$unit" == *"Restart=always"* ]]
|
||||||
|
[[ "$unit" == *"RestartSec=10s"* ]]
|
||||||
|
[[ "$unit" == *"StartLimitBurst=5"* ]]
|
||||||
|
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit caps chromium memory at two gigabytes" {
|
||||||
|
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit condition-gates startup on identity and p12 certificate" {
|
||||||
|
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit runs prelaunch checks before chromium" {
|
||||||
|
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "hdmi udev rule routes through the two-second settle service" {
|
||||||
|
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
|
||||||
|
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
|
||||||
|
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
|
||||||
|
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "hdmi responder settles, declares display, then restarts chromium" {
|
||||||
|
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
|
||||||
|
[[ "$responder" == *"sleep 2"* ]]
|
||||||
|
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
|
||||||
|
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "chromium policy json is valid and disables credential prompts" {
|
||||||
|
command -v jq >/dev/null || skip "jq not installed"
|
||||||
|
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
|
||||||
|
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "launch script tries embed URL and logs bare-player fallback" {
|
||||||
|
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
|
||||||
|
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
|
||||||
|
[[ "$launch" == *"url-divergence.log"* ]]
|
||||||
|
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "prelaunch script validates required node and cert files" {
|
||||||
|
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
|
||||||
|
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
|
||||||
|
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
|
||||||
|
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
|
||||||
|
[[ "$prelaunch" == *"exit 1"* ]]
|
||||||
|
}
|
||||||
@@ -76,15 +76,13 @@ spec:
|
|||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import unicodedata
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
|
|||||||
volume: int = 100 # 0-200
|
volume: int = 100 # 0-200
|
||||||
|
|
||||||
|
|
||||||
|
HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
|
||||||
|
HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
|
||||||
|
|
||||||
|
# eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
|
||||||
|
# names on some builds. For source-text study reads, prefer a stable
|
||||||
|
# scholarly transliteration so words sound like words even without niqqud.
|
||||||
|
HEBREW_WORD_TRANSLITERATIONS = {
|
||||||
|
"אב": "av",
|
||||||
|
"אבא": "abba",
|
||||||
|
"אברהם": "Avraham",
|
||||||
|
"אדמה": "adamah",
|
||||||
|
"אדני": "Adonai",
|
||||||
|
"אדם": "adam",
|
||||||
|
"אור": "or",
|
||||||
|
"אלהים": "Elohim",
|
||||||
|
"אלוהים": "Elohim",
|
||||||
|
"אמן": "amen",
|
||||||
|
"אם": "em",
|
||||||
|
"אמת": "emet",
|
||||||
|
"ארץ": "eretz",
|
||||||
|
"אש": "esh",
|
||||||
|
"את": "et",
|
||||||
|
"בית": "beit",
|
||||||
|
"בן": "ben",
|
||||||
|
"ברא": "bara",
|
||||||
|
"בראשית": "bereshit",
|
||||||
|
"ברית": "berit",
|
||||||
|
"ברוך": "barukh",
|
||||||
|
"בת": "bat",
|
||||||
|
"גוי": "goy",
|
||||||
|
"גוים": "goyim",
|
||||||
|
"גויים": "goyim",
|
||||||
|
"דבר": "davar",
|
||||||
|
"דברים": "devarim",
|
||||||
|
"דוד": "David",
|
||||||
|
"הלל": "hallel",
|
||||||
|
"הארץ": "ha-aretz",
|
||||||
|
"הברית": "ha-berit",
|
||||||
|
"החדשה": "ha-chadashah",
|
||||||
|
"השמים": "ha-shamayim",
|
||||||
|
"השמיים": "ha-shamayim",
|
||||||
|
"ויאמר": "vayomer",
|
||||||
|
"יהוה": "Adonai",
|
||||||
|
"יוסף": "Yosef",
|
||||||
|
"יוחנן": "Yochanan",
|
||||||
|
"ישראל": "Yisrael",
|
||||||
|
"ישוע": "Yeshua",
|
||||||
|
"יצחק": "Yitzchak",
|
||||||
|
"יעקב": "Yaakov",
|
||||||
|
"ירושלים": "Yerushalayim",
|
||||||
|
"כהן": "kohen",
|
||||||
|
"כהנים": "kohanim",
|
||||||
|
"מים": "mayim",
|
||||||
|
"מות": "mavet",
|
||||||
|
"מושיע": "moshia",
|
||||||
|
"מלך": "melekh",
|
||||||
|
"מלכות": "malkhut",
|
||||||
|
"מרים": "Miriam",
|
||||||
|
"משה": "Moshe",
|
||||||
|
"משיח": "Mashiach",
|
||||||
|
"נביא": "navi",
|
||||||
|
"נביאים": "neviim",
|
||||||
|
"עם": "am",
|
||||||
|
"עולם": "olam",
|
||||||
|
"צדק": "tzedek",
|
||||||
|
"קדוש": "qadosh",
|
||||||
|
"קדושים": "qedoshim",
|
||||||
|
"קול": "qol",
|
||||||
|
"רוח": "ruach",
|
||||||
|
"שאול": "Shaul",
|
||||||
|
"שמים": "shamayim",
|
||||||
|
"שמיים": "shamayim",
|
||||||
|
"שמעון": "Shimon",
|
||||||
|
"שלום": "Shalom",
|
||||||
|
"תורה": "torah",
|
||||||
|
"חכמה": "chokhmah",
|
||||||
|
"חסד": "chesed",
|
||||||
|
"חיים": "chayim",
|
||||||
|
"חושך": "choshekh",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_LETTERS = {
|
||||||
|
"א": "a",
|
||||||
|
"ב": "b",
|
||||||
|
"ג": "g",
|
||||||
|
"ד": "d",
|
||||||
|
"ה": "h",
|
||||||
|
"ו": "v",
|
||||||
|
"ז": "z",
|
||||||
|
"ח": "kh",
|
||||||
|
"ט": "t",
|
||||||
|
"י": "y",
|
||||||
|
"כ": "kh",
|
||||||
|
"ך": "kh",
|
||||||
|
"ל": "l",
|
||||||
|
"מ": "m",
|
||||||
|
"ם": "m",
|
||||||
|
"נ": "n",
|
||||||
|
"ן": "n",
|
||||||
|
"ס": "s",
|
||||||
|
"ע": "a",
|
||||||
|
"פ": "p",
|
||||||
|
"ף": "f",
|
||||||
|
"צ": "ts",
|
||||||
|
"ץ": "ts",
|
||||||
|
"ק": "q",
|
||||||
|
"ר": "r",
|
||||||
|
"ש": "sh",
|
||||||
|
"ת": "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_hebrew_marks(value: str) -> str:
|
||||||
|
decomposed = unicodedata.normalize("NFD", value)
|
||||||
|
return "".join(
|
||||||
|
ch for ch in decomposed
|
||||||
|
if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_hebrew_transliteration(word: str) -> str:
|
||||||
|
tokens: list[str] = []
|
||||||
|
chars = list(word)
|
||||||
|
for index, ch in enumerate(chars):
|
||||||
|
token = HEBREW_LETTERS.get(ch)
|
||||||
|
if token is None:
|
||||||
|
continue
|
||||||
|
if ch == "ה" and index == len(chars) - 1:
|
||||||
|
token = "ah"
|
||||||
|
elif ch == "י" and index > 0:
|
||||||
|
token = "i"
|
||||||
|
elif ch == "ו" and index > 0:
|
||||||
|
token = "o"
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return word
|
||||||
|
|
||||||
|
spoken: list[str] = []
|
||||||
|
for index, token in enumerate(tokens):
|
||||||
|
spoken.append(token)
|
||||||
|
next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
|
||||||
|
if (
|
||||||
|
token[-1:] not in HEBREW_VOWELISH
|
||||||
|
and next_token
|
||||||
|
and next_token[:1] not in HEBREW_VOWELISH
|
||||||
|
):
|
||||||
|
spoken.append("a")
|
||||||
|
return "".join(spoken)
|
||||||
|
|
||||||
|
|
||||||
|
def _transliterate_hebrew_word(match: re.Match[str]) -> str:
|
||||||
|
original = match.group(0)
|
||||||
|
normalized = _strip_hebrew_marks(original)
|
||||||
|
if not normalized:
|
||||||
|
return original
|
||||||
|
|
||||||
|
direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
|
||||||
|
if direct:
|
||||||
|
return direct
|
||||||
|
|
||||||
|
if normalized.startswith("ו") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ve-{rest}"
|
||||||
|
|
||||||
|
if normalized.startswith("ה") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ha-{rest}"
|
||||||
|
|
||||||
|
return _fallback_hebrew_transliteration(normalized)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
|
||||||
|
if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
|
||||||
|
spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
|
||||||
|
return spoken, "en-us"
|
||||||
|
return text, voice
|
||||||
|
|
||||||
|
|
||||||
def _resolve_voice(req: TtsRequest) -> str:
|
def _resolve_voice(req: TtsRequest) -> str:
|
||||||
if req.voice:
|
if req.voice:
|
||||||
return req.voice.strip()
|
return req.voice.strip()
|
||||||
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
|
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
args = [
|
args = [
|
||||||
"--stdout",
|
"--stdout",
|
||||||
"-v", voice,
|
"-v", synth_voice,
|
||||||
"-s", str(max(80, min(450, req.rate))),
|
"-s", str(max(80, min(450, req.rate))),
|
||||||
"-p", str(max(0, min(99, req.pitch))),
|
"-p", str(max(0, min(99, req.pitch))),
|
||||||
"-a", str(max(0, min(200, req.volume))),
|
"-a", str(max(0, min(200, req.volume))),
|
||||||
]
|
]
|
||||||
wav = _run_espeak(args, req.text.encode("utf-8"))
|
wav = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
if not wav:
|
if not wav:
|
||||||
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
||||||
return Response(content=wav, media_type="audio/wav")
|
return Response(content=wav, media_type="audio/wav")
|
||||||
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
|
def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
|
||||||
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
||||||
out = _run_espeak(args, req.text.encode("utf-8"))
|
out = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
text = out.decode("utf-8", errors="replace")
|
text = out.decode("utf-8", errors="replace")
|
||||||
total = 0
|
total = 0
|
||||||
for match in PHONEME_DURATION_RE.finditer(text):
|
for match in PHONEME_DURATION_RE.finditer(text):
|
||||||
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
|
|||||||
if not req.text.strip():
|
if not req.text.strip():
|
||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
total_ms = _estimate_total_ms(req, voice)
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
|
total_ms = _estimate_total_ms(req, synth_voice, spoken_text)
|
||||||
|
|
||||||
# Distribute total_ms across whitespace-split words proportional to
|
# Distribute total_ms across whitespace-split words proportional to
|
||||||
# character count. Punctuation-only tokens are folded into the previous
|
# character count. Punctuation-only tokens are folded into the previous
|
||||||
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
|
|||||||
{
|
{
|
||||||
"text": req.text,
|
"text": req.text,
|
||||||
"language": req.language,
|
"language": req.language,
|
||||||
"voice": voice,
|
"voice": synth_voice,
|
||||||
"words": out_words,
|
"words": out_words,
|
||||||
"durationMs": total_ms,
|
"durationMs": total_ms,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,19 @@ spec:
|
|||||||
app.kubernetes.io/name: ttsreader-piper
|
app.kubernetes.io/name: ttsreader-piper
|
||||||
app.kubernetes.io/part-of: flowercore
|
app.kubernetes.io/part-of: flowercore
|
||||||
spec:
|
spec:
|
||||||
|
# Bypass CoreDNS's *.iamworkin.lan wildcard so the init container reaches
|
||||||
|
# huggingface.co directly when it seeds voice models.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- fc-ttsreader.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: seed-voices
|
- name: seed-voices
|
||||||
image: rhasspy/wyoming-piper:latest
|
image: rhasspy/wyoming-piper:latest
|
||||||
@@ -346,7 +359,7 @@ spec:
|
|||||||
runAsUser: 1654
|
runAsUser: 1654
|
||||||
containers:
|
containers:
|
||||||
- name: biblical-tts
|
- name: biblical-tts
|
||||||
image: localhost/fc-biblical-tts:v1
|
image: localhost/fc-biblical-tts:v20260506-hebrew-translit
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 10402
|
- containerPort: 10402
|
||||||
@@ -519,7 +532,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-ttsreader-web:v202604291817
|
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5217
|
- containerPort: 5217
|
||||||
@@ -537,12 +550,20 @@ spec:
|
|||||||
value: "/usr/bin/ffmpeg"
|
value: "/usr/bin/ffmpeg"
|
||||||
- name: TtsReader__Bible__CorpusRoot
|
- name: TtsReader__Bible__CorpusRoot
|
||||||
value: "/data/corpus-cache/world-english-bible/eng/usx"
|
value: "/data/corpus-cache/world-english-bible/eng/usx"
|
||||||
|
- name: TtsReader__ChapterContext__DatabasePath
|
||||||
|
value: "/data/chapter-context.db"
|
||||||
- name: TtsReader__Jobs__Root
|
- name: TtsReader__Jobs__Root
|
||||||
value: "/data/jobs"
|
value: "/data/jobs"
|
||||||
|
- name: TtsReader__Export__LocalCasRoot
|
||||||
|
value: "/data/bundles/cas"
|
||||||
- name: TtsReader__Piper__Host
|
- name: TtsReader__Piper__Host
|
||||||
value: "ttsreader-piper.fc-ttsreader.svc.cluster.local."
|
value: "10.0.57.17"
|
||||||
- name: TtsReader__Piper__Port
|
- name: TtsReader__Piper__Port
|
||||||
value: "10200"
|
value: "8500"
|
||||||
|
- name: TtsReader__Piper__Transport
|
||||||
|
value: "http"
|
||||||
|
- name: TtsReader__Piper__HttpPath
|
||||||
|
value: "/tts"
|
||||||
- name: TtsReader__Kokoro__Enabled
|
- name: TtsReader__Kokoro__Enabled
|
||||||
value: "true"
|
value: "true"
|
||||||
- name: TtsReader__Kokoro__BaseUrl
|
- name: TtsReader__Kokoro__BaseUrl
|
||||||
@@ -553,6 +574,14 @@ spec:
|
|||||||
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
||||||
- name: TtsReader__Kokoro__TimeoutSeconds
|
- name: TtsReader__Kokoro__TimeoutSeconds
|
||||||
value: "120"
|
value: "120"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__BaseUrl
|
||||||
|
value: "http://ttsreader-biblical.fc-ttsreader.svc.cluster.local.:10402"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__TimeoutSeconds
|
||||||
|
value: "60"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__DefaultLanguage
|
||||||
|
value: "grc"
|
||||||
- name: Speech__Alignment__Enabled
|
- name: Speech__Alignment__Enabled
|
||||||
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
||||||
# ttsreader-align deployment in this manifest wraps
|
# ttsreader-align deployment in this manifest wraps
|
||||||
@@ -588,6 +617,8 @@ spec:
|
|||||||
# the writable PVC mount.
|
# the writable PVC mount.
|
||||||
- name: TtsReader__Preview__CacheDirectory
|
- name: TtsReader__Preview__CacheDirectory
|
||||||
value: "/data/voice-previews"
|
value: "/data/voice-previews"
|
||||||
|
- name: TtsReader__VoiceLibrary__ReferenceClip__Directory
|
||||||
|
value: "/data/voice-reference-clips"
|
||||||
# Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
|
# Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
|
||||||
# POST /api/v1/render. Default "wwwroot/cdn" resolves under the
|
# POST /api/v1/render. Default "wwwroot/cdn" resolves under the
|
||||||
# read-only app filesystem, so pin to the writable PVC mount
|
# read-only app filesystem, so pin to the writable PVC mount
|
||||||
@@ -609,7 +640,10 @@ spec:
|
|||||||
optional: true
|
optional: true
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
# The cluster is currently saturated on requested CPU by
|
||||||
|
# remotedesktop workloads even when real usage is low.
|
||||||
|
# Keep the web frontend schedulable under that pressure.
|
||||||
|
cpu: 10m
|
||||||
memory: 256Mi
|
memory: 256Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
|
|||||||
47
apps/fc-updater/README.md
Normal file
47
apps/fc-updater/README.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# fc-updater — Update Center GitOps adoption
|
||||||
|
|
||||||
|
**Status:** adopted into `bluejay-infra` on 2026-05-06. The live ArgoCD
|
||||||
|
Application is `infra-fc-updater`, generated by the `bluejay-infra`
|
||||||
|
ApplicationSet with automated sync, `prune: true`, and `selfHeal: true`.
|
||||||
|
|
||||||
|
## Managed manifest set
|
||||||
|
|
||||||
|
`apps/fc-updater/fc-updater.yaml` manages:
|
||||||
|
|
||||||
|
- `Namespace/fc-updater`
|
||||||
|
- `PersistentVolumeClaim/updatecenter-data`
|
||||||
|
- `Deployment/updatecenter-web`
|
||||||
|
- `Service/updatecenter-web`
|
||||||
|
- `Certificate/updatecenter-web-tls`
|
||||||
|
- `Certificate/updatecenter-web-internal-tls`
|
||||||
|
- `IngressRoute/updatecenter-web`
|
||||||
|
- `IngressRoute/updatecenter-web-internal`
|
||||||
|
- `IngressRoute/updatecenter-web-public`
|
||||||
|
|
||||||
|
The Deployment intentionally sets `revisionHistoryLimit: 3` and
|
||||||
|
`strategy.type: Recreate`. The service is singleton + SQLite/local bundle
|
||||||
|
storage on `PersistentVolumeClaim/updatecenter-data`, pinned to
|
||||||
|
`rke2-server`.
|
||||||
|
|
||||||
|
## Runtime dependencies intentionally not stored here
|
||||||
|
|
||||||
|
These live Secrets are pre-existing runtime material and are not committed to
|
||||||
|
Git:
|
||||||
|
|
||||||
|
- `updater-bootstrap-auth`
|
||||||
|
- `updater-signing`
|
||||||
|
- `updater-webhooks`
|
||||||
|
- `cf-origin-flowercore-io`
|
||||||
|
|
||||||
|
Rotate the Cloudflare Origin Certificate through
|
||||||
|
`FlowerCore.Notes/docs/standards/code-signing-rotation-runbook.md`; the
|
||||||
|
shared origin cert must exist in every namespace that serves a
|
||||||
|
`*.flowercore.io` public IngressRoute.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n argocd get application infra-fc-updater
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n fc-updater get deploy,svc,ingressroute,certificate,pvc
|
||||||
|
curl.exe -sk https://update.flowercore.io/api/v1/manifests/_schema
|
||||||
|
```
|
||||||
271
apps/fc-updater/fc-updater.yaml
Normal file
271
apps/fc-updater/fc-updater.yaml
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
# FlowerCore Update Center
|
||||||
|
# GitOps adoption of the live fc-updater namespace after PUB-1/PUB-3.
|
||||||
|
# Runtime credentials remain in existing K8s Secrets; do not store them here.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-data
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
# Sized for fleet bundle storage (LocalFsBundleStore.MaxTotalBytes
|
||||||
|
# soft cap at 25 GiB per project_uc_remaining_4_apps_signed_2026_05_06).
|
||||||
|
# Mike Bundle alone is ~5.1 GiB; cluster live capacity is already
|
||||||
|
# 20 GiB after a manual expand. PVCs cannot shrink, so git must track
|
||||||
|
# at least the live size to avoid the OutOfSync loop.
|
||||||
|
storage: 25Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# SQLite + local bundle storage live on a single RWO PVC. Recreate avoids
|
||||||
|
# two pods overlapping the same write path during future image bumps.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: updatecenter-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
spec:
|
||||||
|
nodeName: rke2-server
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-updater-web:v202605310029-7974fc4
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: http://+:8080
|
||||||
|
- name: FlowerCore__Updater__Database__Provider
|
||||||
|
value: sqlite
|
||||||
|
- name: FlowerCore__Updater__Database__ConnectionString
|
||||||
|
value: Data Source=/data/updatecenter.db
|
||||||
|
- name: FlowerCore__Updater__BundleStorage__LocalFs__RootDirectory
|
||||||
|
value: /data/bundles
|
||||||
|
- name: FlowerCore__Updater__PublicShares__RequirePublicVisibilityOnPublicHosts
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Code
|
||||||
|
value: 8f3c2a9e7d41
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__AppId
|
||||||
|
value: flowercore.faith-ai-mike
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Channel
|
||||||
|
value: stable
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__RuntimeId
|
||||||
|
value: win-x64
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__DisplayName
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Headline
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Description
|
||||||
|
value: Private release link for Mike's Faith AI bundle.
|
||||||
|
- name: FlowerCore__Audit__Sinks__Loki__Enabled
|
||||||
|
value: "false"
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Username
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: username
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: password
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__SigningKey
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: signing-key
|
||||||
|
- name: FlowerCore__Updater__Signing__AutoSignOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__RequireSignatureOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxBase64
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-base64
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxPassword
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-password
|
||||||
|
- name: FlowerCore__Updater__Signing__OpItemReference
|
||||||
|
value: op://FlowerCore/step-ca-codesign
|
||||||
|
- name: FlowerCore__Updater__Signing__TrustAnchorPath
|
||||||
|
value: /etc/flowercore-updater/signing/root-ca.pem
|
||||||
|
- name: FlowerCore__Updater__GitHub__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-token
|
||||||
|
- name: FlowerCore__Updater__GitHub__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-webhook-secret
|
||||||
|
- name: FlowerCore__Updater__Gitea__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-token
|
||||||
|
- name: FlowerCore__Updater__Gitea__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-webhook-secret
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 15
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: signing
|
||||||
|
mountPath: /etc/flowercore-updater/signing
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: updatecenter-data
|
||||||
|
- name: signing
|
||||||
|
secret:
|
||||||
|
secretName: updater-signing
|
||||||
|
items:
|
||||||
|
- key: root-ca.pem
|
||||||
|
path: root-ca.pem
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: updatecenter-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: http
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter.iamworkin.lan
|
||||||
|
- updates.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter-internal.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`updatecenter.iamworkin.lan`) || Host(`updates.iamworkin.lan`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`updatecenter-internal.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-public
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`update.flowercore.io`) || Host(`updates.flowercore.io`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: cf-origin-flowercore-io
|
||||||
7
apps/fc-updater/kustomization.yaml
Normal file
7
apps/fc-updater/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# ArgoCD's bluejay-infra ApplicationSet uses a directory generator and does
|
||||||
|
# not require kustomization.yaml. Keep this anyway as the manifest inventory
|
||||||
|
# and for local `kubectl kustomize apps/fc-updater` previews.
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- fc-updater.yaml
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
# FlowerCore Tenant — flowercore.io (main brand)
|
# FlowerCore Tenant — retired flowercore.io placeholder.
|
||||||
# Public-facing placeholder landing page served by nginx
|
#
|
||||||
|
# Public flowercore.io/www.flowercore.io routing is now owned by
|
||||||
|
# apps/fc-landing/fc-landing.yaml. This tenant placeholder remains available
|
||||||
|
# only as an in-cluster service; do not create a duplicate public
|
||||||
|
# IngressRoute here because it competes with fc-landing and requires a
|
||||||
|
# namespace-local cf-origin-flowercore-io Secret.
|
||||||
# ArgoCD managed - BlueJay Lab
|
# ArgoCD managed - BlueJay Lab
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -10,12 +15,6 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: bluejay-infra
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
flowercore.io/tenant: flowercore
|
flowercore.io/tenant: flowercore
|
||||||
---
|
---
|
||||||
# NOTE: The existing cf-origin-flowercore-io secret (covering *.flowercore.io)
|
|
||||||
# must be copied into this namespace. It already exists in other namespaces.
|
|
||||||
# Copy with: kubectl get secret cf-origin-flowercore-io -n fc-system -o yaml \
|
|
||||||
# | sed 's/namespace: .*/namespace: tenant-flowercore/' \
|
|
||||||
# | kubectl apply -f -
|
|
||||||
---
|
|
||||||
# Landing page HTML
|
# Landing page HTML
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -311,22 +310,3 @@ spec:
|
|||||||
- port: 80
|
- port: 80
|
||||||
targetPort: 80
|
targetPort: 80
|
||||||
name: http
|
name: http
|
||||||
---
|
|
||||||
# Traefik IngressRoute — public via Cloudflare
|
|
||||||
# Uses existing cf-origin-flowercore-io cert (must be copied to this namespace)
|
|
||||||
apiVersion: traefik.io/v1alpha1
|
|
||||||
kind: IngressRoute
|
|
||||||
metadata:
|
|
||||||
name: flowercore-web
|
|
||||||
namespace: tenant-flowercore
|
|
||||||
spec:
|
|
||||||
entryPoints:
|
|
||||||
- websecure
|
|
||||||
routes:
|
|
||||||
- match: Host(`flowercore.io`) || Host(`www.flowercore.io`)
|
|
||||||
kind: Rule
|
|
||||||
services:
|
|
||||||
- name: flowercore-web
|
|
||||||
port: 80
|
|
||||||
tls:
|
|
||||||
secretName: cf-origin-flowercore-io
|
|
||||||
|
|||||||
2
apps/github-runner/.gitattributes
vendored
Normal file
2
apps/github-runner/.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*.sh text eol=lf
|
||||||
|
Dockerfile text eol=lf
|
||||||
54
apps/github-runner/Dockerfile
Normal file
54
apps/github-runner/Dockerfile
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
FROM myoung34/github-runner:latest
|
||||||
|
|
||||||
|
ARG RUBY_VERSION=3.3.11
|
||||||
|
ARG RUBY_MINOR=3.3
|
||||||
|
ARG RUBY_BUILD_VERSION=v20260326
|
||||||
|
ARG RUNNER_UID=1001
|
||||||
|
ARG RUNNER_GID=1001
|
||||||
|
|
||||||
|
ENV RUNNER_TOOL_CACHE=/home/runner/_tool
|
||||||
|
ENV RUNNER_RUBY_TOOLCACHE=/opt/runner-toolcache
|
||||||
|
ENV PATH="/home/runner/_tool/Ruby/${RUBY_MINOR}/x64/bin:/opt/runner-toolcache/Ruby/${RUBY_MINOR}/x64/bin:${PATH}"
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Bake the IAmWorkin step-ca root CA into the system trust store. Without
|
||||||
|
# this, .NET HttpClient calls from CI tests against *.iamworkin.lan
|
||||||
|
# (e.g. https://selenium.iamworkin.lan/session) fail with `PartialChain`
|
||||||
|
# because the runner image's default Ubuntu trust bundle doesn't include
|
||||||
|
# our internal Root CA. update-ca-certificates regenerates
|
||||||
|
# /etc/ssl/certs/ca-certificates.crt, which OpenSSL + .NET on Linux read
|
||||||
|
# automatically — no SSL_CERT_FILE env var needed.
|
||||||
|
COPY step-ca-root.crt /usr/local/share/ca-certificates/iamworkin-step-ca-root.crt
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
|
autoconf \
|
||||||
|
bison \
|
||||||
|
build-essential \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
libdb-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libgdbm-dev \
|
||||||
|
libgmp-dev \
|
||||||
|
libncurses-dev \
|
||||||
|
libreadline-dev \
|
||||||
|
libssl-dev \
|
||||||
|
libyaml-dev \
|
||||||
|
patch \
|
||||||
|
pkg-config \
|
||||||
|
uuid-dev \
|
||||||
|
zlib1g-dev \
|
||||||
|
&& update-ca-certificates \
|
||||||
|
&& curl -fsSL "https://github.com/rbenv/ruby-build/archive/refs/tags/${RUBY_BUILD_VERSION}.tar.gz" -o /tmp/ruby-build.tar.gz \
|
||||||
|
&& mkdir -p /tmp/ruby-build \
|
||||||
|
&& tar -xzf /tmp/ruby-build.tar.gz --strip-components=1 -C /tmp/ruby-build \
|
||||||
|
&& /tmp/ruby-build/install.sh \
|
||||||
|
&& rm -rf /tmp/ruby-build /tmp/ruby-build.tar.gz /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY install-ruby-toolcache.sh /usr/local/bin/install-ruby-toolcache.sh
|
||||||
|
|
||||||
|
RUN chmod +x /usr/local/bin/install-ruby-toolcache.sh \
|
||||||
|
&& RUBY_VERSION="${RUBY_VERSION}" RUBY_MINOR="${RUBY_MINOR}" TOOLCACHE_ROOT="${RUNNER_RUBY_TOOLCACHE}" RUNNER_UID="${RUNNER_UID}" RUNNER_GID="${RUNNER_GID}" /usr/local/bin/install-ruby-toolcache.sh \
|
||||||
|
&& ruby -v
|
||||||
133
apps/github-runner/README.md
Normal file
133
apps/github-runner/README.md
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
# GitHub Runner Fleet
|
||||||
|
|
||||||
|
ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
|
||||||
|
Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
|
||||||
|
|
||||||
|
## Runner Shape
|
||||||
|
|
||||||
|
All repo-scoped Linux runners use:
|
||||||
|
|
||||||
|
- `localhost/fc-github-runner:v20260525-ruby3.3.11-stepca`, derived from
|
||||||
|
`myoung34/github-runner:latest`
|
||||||
|
- `ACCESS_TOKEN` from the `github-runner-token` Secret
|
||||||
|
- `RUN_AS_ROOT=false`
|
||||||
|
- `EPHEMERAL=true`
|
||||||
|
- `LABELS=self-hosted,linux,fc-build-linux`
|
||||||
|
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
|
||||||
|
Actions tool cache
|
||||||
|
- Ruby 3.3.11 seeded into `/home/runner/_tool/Ruby/3.3/x64` from the baked
|
||||||
|
`/opt/runner-toolcache` copy so `ruby/setup-ruby@v1` can discover it on
|
||||||
|
self-hosted `ubuntu-20.04-x64` runners
|
||||||
|
|
||||||
|
`github-runner` for `FlowerCore.Common` is single-replica because it retains the
|
||||||
|
original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
|
||||||
|
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
|
||||||
|
strategy: no two pods share one RWO PVC.
|
||||||
|
|
||||||
|
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
||||||
|
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
|
||||||
|
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
|
||||||
|
`FlowerCore.Distribution`, `FlowerCore.Scoreboard`,
|
||||||
|
`FlowerCore.SegmentDisplay`, `FlowerCore.Signage.Contracts`,
|
||||||
|
`FlowerCore.SignalControl`, `FlowerCore.Intranet.Web`,
|
||||||
|
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
|
||||||
|
`FlowerCore.MenuBoard`.
|
||||||
|
|
||||||
|
## Image Build
|
||||||
|
|
||||||
|
Ruby is baked with a pinned `ruby-build` release and Ruby patch version. The pod
|
||||||
|
still mounts an `emptyDir` over `/home/runner`, so the `setup-runner-home` init
|
||||||
|
container copies the baked toolcache from `/opt/runner-toolcache/Ruby` into
|
||||||
|
`/home/runner/_tool/Ruby` before the runner container starts.
|
||||||
|
|
||||||
|
The IAmWorkin step-ca root CA is also baked into the system trust store
|
||||||
|
(`/usr/local/share/ca-certificates/iamworkin-step-ca-root.crt`, registered by
|
||||||
|
`update-ca-certificates`). Without it, .NET HttpClient calls from CI tests
|
||||||
|
against `*.iamworkin.lan` (e.g. `https://selenium.iamworkin.lan/session`)
|
||||||
|
fail with `PartialChain`. To refresh the bundled cert when the root rotates,
|
||||||
|
re-extract from the cluster and overwrite `step-ca-root.crt`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get secret -n cert-manager step-ca-root \
|
||||||
|
-o jsonpath='{.data.ca\.crt}' | base64 -d > step-ca-root.crt
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd apps/github-runner
|
||||||
|
podman build -t localhost/fc-github-runner:v20260525-ruby3.3.11-stepca .
|
||||||
|
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca ruby -v
|
||||||
|
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||||
|
test -f /opt/runner-toolcache/Ruby/3.3/x64.complete
|
||||||
|
podman save localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||||
|
-o fc-github-runner-v20260525-ruby3.3.11-stepca.tar
|
||||||
|
```
|
||||||
|
|
||||||
|
Import the saved image on every schedulable RKE2 node before ArgoCD rolls the
|
||||||
|
Deployments:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for node in rke2-server rke2-agent1 rke2-agent2; do
|
||||||
|
scp fc-github-runner-v20260525-ruby3.3.11-stepca.tar "$node:/tmp/"
|
||||||
|
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca || true'
|
||||||
|
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-github-runner-v20260525-ruby3.3.11-stepca.tar'
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post-Merge Proof
|
||||||
|
|
||||||
|
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n github-runner get deploy,pods,pvc
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the Ruby toolcache in a fresh pod:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- ruby -v
|
||||||
|
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- sh -c \
|
||||||
|
'echo "$RUNNER_TOOL_CACHE" && test -f "$RUNNER_TOOL_CACHE/Ruby/3.3/x64.complete"'
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify GitHub registration for the repo-scoped runners:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
|
||||||
|
FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
|
||||||
|
FlowerCore.MySQL FlowerCore.Kiosk.Linux FlowerCore.Marquee FlowerCore.TtsReader \
|
||||||
|
FlowerCore.Knowledge FlowerCore.LlmBridge FlowerCore.Media \
|
||||||
|
FlowerCore.Presentations FlowerCore.RemoteDesktop FlowerCore.DNS \
|
||||||
|
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
|
||||||
|
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
|
||||||
|
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
|
||||||
|
FlowerCore.MenuBoard; do
|
||||||
|
echo "=== $repo ==="
|
||||||
|
gh api "/repos/astoltz/$repo/actions/runners" \
|
||||||
|
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
Shared.Pos publish proof after the runner pod is online:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
gh run list --repo astoltz/FlowerCore.Shared.Pos \
|
||||||
|
--workflow "Build, Test & Publish" --branch main --limit 5
|
||||||
|
```
|
||||||
|
|
||||||
|
If the latest run is still queued after runner registration, rerun the workflow
|
||||||
|
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
||||||
|
|
||||||
|
## Failure Notes
|
||||||
|
|
||||||
|
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
||||||
|
`DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
|
||||||
|
present on the runner pod.
|
||||||
|
- `ruby/setup-ruby@v1` says self-hosted runners must install Ruby in
|
||||||
|
`$RUNNER_TOOL_CACHE`: check that the init container copied
|
||||||
|
`/opt/runner-toolcache/Ruby` into `/home/runner/_tool/Ruby` and that
|
||||||
|
`/home/runner/_tool/Ruby/3.3/x64.complete` exists.
|
||||||
|
- `404` during runner registration: the fine-grained PAT is valid but missing
|
||||||
|
repository access for that repo. Add the repo to the PAT access list; the PAT
|
||||||
|
value does not change.
|
||||||
|
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
|
||||||
|
stay single-replica. New multi-replica runners use `emptyDir`.
|
||||||
4592
apps/github-runner/github-runner.yaml
Normal file
4592
apps/github-runner/github-runner.yaml
Normal file
File diff suppressed because it is too large
Load Diff
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
RUBY_VERSION="${RUBY_VERSION:-3.3.11}"
|
||||||
|
RUBY_MINOR="${RUBY_MINOR:-3.3}"
|
||||||
|
TOOLCACHE_ROOT="${TOOLCACHE_ROOT:-/opt/runner-toolcache}"
|
||||||
|
RUNNER_UID="${RUNNER_UID:-1001}"
|
||||||
|
RUNNER_GID="${RUNNER_GID:-1001}"
|
||||||
|
RUBY_PREFIX="${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64"
|
||||||
|
|
||||||
|
mkdir -p "${TOOLCACHE_ROOT}/Ruby"
|
||||||
|
RUBY_CONFIGURE_OPTS="${RUBY_CONFIGURE_OPTS:---disable-install-doc --disable-yjit}" ruby-build "${RUBY_VERSION}" "${RUBY_PREFIX}"
|
||||||
|
|
||||||
|
touch "${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64.complete"
|
||||||
|
ln -sfn "${RUBY_VERSION}" "${TOOLCACHE_ROOT}/Ruby/${RUBY_MINOR}"
|
||||||
|
|
||||||
|
"${RUBY_PREFIX}/bin/ruby" -v
|
||||||
|
chown -R "${RUNNER_UID}:${RUNNER_GID}" "${TOOLCACHE_ROOT}"
|
||||||
|
chmod -R a+rX "${TOOLCACHE_ROOT}"
|
||||||
12
apps/github-runner/step-ca-root.crt
Normal file
12
apps/github-runner/step-ca-root.crt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBxDCCAWqgAwIBAgIRAPY357G6ow6zMAL5+4bS2kkwCgYIKoZIzj0EAwIwQDEa
|
||||||
|
MBgGA1UEChMRSUFtV29ya2luIEFDTUUgQ0ExIjAgBgNVBAMTGUlBbVdvcmtpbiBB
|
||||||
|
Q01FIENBIFJvb3QgQ0EwHhcNMjYwMzA4MTgwNzExWhcNMzYwMzA1MTgwNzExWjBA
|
||||||
|
MRowGAYDVQQKExFJQW1Xb3JraW4gQUNNRSBDQTEiMCAGA1UEAxMZSUFtV29ya2lu
|
||||||
|
IEFDTUUgQ0EgUm9vdCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABJ2n04X1
|
||||||
|
JZo5Zdq/i1Idv8+fqwZyAzBh7whbqj0SWsJL8UWRabCMqYCs7+dXO0xRSzqkwFDL
|
||||||
|
x+vooOai8RgRNhajRTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/
|
||||||
|
AgEBMB0GA1UdDgQWBBRnuPPQR6iM/H6vOluiU3Sygayz8jAKBggqhkjOPQQDAgNI
|
||||||
|
ADBFAiEArQK9dYPGmAZsdYnjziuFVVE5NKZUcceYvGfGC+tLXUsCIAudF2zJrCRq
|
||||||
|
3mK50ZZET/fwTkJwiEF4824mjP8p1CKM
|
||||||
|
-----END CERTIFICATE-----
|
||||||
@@ -466,11 +466,11 @@ spec:
|
|||||||
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
||||||
---
|
---
|
||||||
---
|
---
|
||||||
# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
|
# 1Password-backed credentials for Mac mini VNC access (Phase 1 <EFBFBD> 2026-04-28)
|
||||||
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
||||||
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
||||||
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
||||||
# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
|
# Actual IP is 10.0.56.115 (INFRA VLAN) <EFBFBD> the 1P item 'IP' field is kept as backup reference.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
@@ -481,6 +481,7 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: flowercore
|
app.kubernetes.io/part-of: flowercore
|
||||||
spec:
|
spec:
|
||||||
itemPath: vaults/IAmWorkin/items/Mac Mini
|
itemPath: vaults/IAmWorkin/items/Mac Mini
|
||||||
|
---
|
||||||
# Blue Jay Branding Extension (CSS + translations)
|
# Blue Jay Branding Extension (CSS + translations)
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: intranet-web
|
- name: intranet-web
|
||||||
image: localhost/fc-intranet-web:v20260429-1646
|
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5300
|
- containerPort: 5300
|
||||||
|
|||||||
@@ -5,7 +5,9 @@ Phase 2.4 closed. Pod running, certificate issued (step-ca-acme), PVC
|
|||||||
bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
|
bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
|
||||||
returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
|
returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
|
||||||
*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
|
*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
|
||||||
population).
|
population). Phase 1 of the Agent Zero MCP rollout keeps `/healthz`
|
||||||
|
anonymous and gates `/mcp` behind `Authorization: Bearer <token>` built
|
||||||
|
from the 1Password item `FlowerCore Knowledge MCP Tokens`.
|
||||||
|
|
||||||
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
|
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
|
||||||
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
|
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
|
||||||
@@ -19,6 +21,12 @@ search to the rest of the FC ecosystem (Agent Zero, Chat.Web persona
|
|||||||
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
|
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
|
||||||
bot, Pi nodes via `fc-index sync`).
|
bot, Pi nodes via `fc-index sync`).
|
||||||
|
|
||||||
|
Phase 1 MCP routing is explicit:
|
||||||
|
|
||||||
|
- in-cluster Agent Zero → `http://knowledge-web.knowledge.svc/mcp`
|
||||||
|
- workstation Agent Zero → `https://knowledge.iamworkin.lan/mcp`
|
||||||
|
- probe URL for both lanes → `/healthz`
|
||||||
|
|
||||||
## Deployment order (do NOT skip / reorder)
|
## Deployment order (do NOT skip / reorder)
|
||||||
|
|
||||||
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
|
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
|
||||||
|
|||||||
@@ -40,16 +40,16 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: bluejay-infra
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
---
|
---
|
||||||
# MCP API key — synced from 1Password so /mcp stays gated without baking
|
# MCP bearer token for the read-only Agent Zero Phase 1 lane. The 1Password
|
||||||
# secrets into Git. The PASSWORD category maps the concealed field to Secret
|
# item currently stores the raw token in its concealed PASSWORD field, which
|
||||||
# key `password`, which the Deployment reads into FlowerCore:Mcp:ApiKey:Key.
|
# the operator syncs into the namespaced Secret key `password`.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
name: knowledge-mcp-api-key
|
name: knowledge-mcp-tokens
|
||||||
namespace: knowledge
|
namespace: knowledge
|
||||||
spec:
|
spec:
|
||||||
itemPath: "vaults/IAmWorkin/items/KnowledgeApiKey"
|
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: PersistentVolumeClaim
|
kind: PersistentVolumeClaim
|
||||||
@@ -102,8 +102,17 @@ spec:
|
|||||||
- name: web
|
- name: web
|
||||||
# Placeholder tag — bump to the image you built + imported to ALL
|
# Placeholder tag — bump to the image you built + imported to ALL
|
||||||
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
||||||
image: localhost/fc-knowledge-web:v202604272200
|
image: localhost/fc-knowledge-web:v20260429232635
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||||
|
export FlowerCore__Mcp__ApiKey__Key="Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}"
|
||||||
|
fi
|
||||||
|
exec dotnet FlowerCore.Knowledge.Web.dll
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
name: http
|
name: http
|
||||||
@@ -115,7 +124,7 @@ spec:
|
|||||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
value: "false"
|
value: "false"
|
||||||
# Vector-store directory + embedding model + edition profile dir.
|
# Vector-store directory + embedding model + edition profile dir.
|
||||||
# Profile JSON is baked into the image at /app/editions via the
|
# Profile JSON is baked into the image at /home/app/editions via the
|
||||||
# csproj Content-link from FlowerCore.Common/editions/.
|
# csproj Content-link from FlowerCore.Common/editions/.
|
||||||
- name: Knowledge__VectorStoresDirectory
|
- name: Knowledge__VectorStoresDirectory
|
||||||
value: "/data/vector-stores"
|
value: "/data/vector-stores"
|
||||||
@@ -126,7 +135,7 @@ spec:
|
|||||||
- name: Knowledge__MaxLimit
|
- name: Knowledge__MaxLimit
|
||||||
value: "50"
|
value: "50"
|
||||||
- name: FlowerCore__Editions__ProfileDirectory
|
- name: FlowerCore__Editions__ProfileDirectory
|
||||||
value: "/app/editions"
|
value: "/home/app/editions"
|
||||||
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
|
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
|
||||||
# services do not depend on BLUEJAY-WS (private dev hardware) per
|
# services do not depend on BLUEJAY-WS (private dev hardware) per
|
||||||
# bluejay-infra@0f9d56e. Query-time embedding is fast enough on
|
# bluejay-infra@0f9d56e. Query-time embedding is fast enough on
|
||||||
@@ -138,7 +147,14 @@ spec:
|
|||||||
- name: FlowerCore__Mcp__ApiKey__Key
|
- name: FlowerCore__Mcp__ApiKey__Key
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: knowledge-mcp-api-key
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
|
- name: FlowerCore__Mcp__ApiKey__HeaderName
|
||||||
|
value: "Authorization"
|
||||||
|
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
key: password
|
key: password
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
@@ -185,7 +201,7 @@ spec:
|
|||||||
- name: tmp
|
- name: tmp
|
||||||
mountPath: /tmp
|
mountPath: /tmp
|
||||||
- name: logs
|
- name: logs
|
||||||
mountPath: /app/logs
|
mountPath: /home/app/logs
|
||||||
volumes:
|
volumes:
|
||||||
- name: vector-store
|
- name: vector-store
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
@@ -225,8 +241,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- knowledge.iamworkin.lan
|
- knowledge.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10888+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
93
apps/kubevirt-vms/ci1.yaml
Normal file
93
apps/kubevirt-vms/ci1.yaml
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# ci1 - Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||||
|
# =============================================================================
|
||||||
|
# Boots from the sysprepped containerDisk template built by the Windows VM
|
||||||
|
# sysprep pipeline. See docs/infrastructure/windows-vm-sysprep-pipeline.md.
|
||||||
|
# Path A/B/C install history is preserved in git log only.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: kubevirt-stack
|
||||||
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: kubevirt.io/v1
|
||||||
|
kind: VirtualMachine
|
||||||
|
metadata:
|
||||||
|
name: ci1
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
runStrategy: Halted
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
kubevirt.io/vm: ci1
|
||||||
|
spec:
|
||||||
|
domain:
|
||||||
|
cpu:
|
||||||
|
cores: 8
|
||||||
|
sockets: 1
|
||||||
|
threads: 1
|
||||||
|
memory:
|
||||||
|
guest: 16Gi
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 16Gi
|
||||||
|
limits:
|
||||||
|
memory: 16Gi
|
||||||
|
clock:
|
||||||
|
utc: {}
|
||||||
|
timer:
|
||||||
|
hpet:
|
||||||
|
present: false
|
||||||
|
pit:
|
||||||
|
tickPolicy: delay
|
||||||
|
rtc:
|
||||||
|
tickPolicy: catchup
|
||||||
|
hyperv: {}
|
||||||
|
features:
|
||||||
|
acpi: {}
|
||||||
|
apic: {}
|
||||||
|
hyperv:
|
||||||
|
relaxed: {}
|
||||||
|
vapic: {}
|
||||||
|
spinlocks:
|
||||||
|
spinlocks: 8191
|
||||||
|
smm: {}
|
||||||
|
firmware:
|
||||||
|
bootloader:
|
||||||
|
efi:
|
||||||
|
secureBoot: false
|
||||||
|
devices:
|
||||||
|
tpm: {}
|
||||||
|
disks:
|
||||||
|
- name: rootdisk
|
||||||
|
disk:
|
||||||
|
bus: virtio
|
||||||
|
interfaces:
|
||||||
|
# Pod-network fallback for CI runner outbound traffic. Switch to
|
||||||
|
# prod-vlan57 once the bridge/NAD lane is ready for L2 access.
|
||||||
|
- name: default
|
||||||
|
masquerade: {}
|
||||||
|
model: virtio
|
||||||
|
machine:
|
||||||
|
type: q35
|
||||||
|
networks:
|
||||||
|
- name: default
|
||||||
|
pod: {}
|
||||||
|
volumes:
|
||||||
|
- name: rootdisk
|
||||||
|
containerDisk:
|
||||||
|
image: localhost/fc-win-server-2025:v1
|
||||||
|
imagePullPolicy: Never
|
||||||
|
terminationGracePeriodSeconds: 3600
|
||||||
3
apps/kubevirt-vms/kustomization.yaml
Normal file
3
apps/kubevirt-vms/kustomization.yaml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
resources:
|
||||||
|
- ci1.yaml
|
||||||
|
- prod-vlan57-nad.yaml
|
||||||
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# NetworkAttachmentDefinition — PROD VLAN 57 bridge
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: makes KubeVirt VMs reachable on the PROD VLAN (10.0.57.0/24)
|
||||||
|
# alongside the existing pod network. Required for ci1 to bridge onto PROD
|
||||||
|
# (e.g. to provision/scrape edge1, edge2, kiosks, Pis on the same L2 segment).
|
||||||
|
#
|
||||||
|
# **DEPLOY GATE — Phase 1.5 host work required first**:
|
||||||
|
# On every RKE2 node (rke2-server, rke2-agent1, rke2-agent2):
|
||||||
|
# 1. Switch port (UniFi USL16LP) trunks VLAN 57 to the node — usually
|
||||||
|
# already true since BLUEJAY-WS reaches 10.0.57.x services. Verify
|
||||||
|
# with `ip link show enp86s0.57` after configuring sub-interface, OR
|
||||||
|
# `tcpdump -ni enp86s0 vlan 57` and ping a known PROD host.
|
||||||
|
# 2. Linux bridge `br-prod` enslaving `enp86s0.57` (VLAN sub-interface).
|
||||||
|
# NetworkManager profile examples in the runbook below.
|
||||||
|
# 3. Verify Multus DaemonSet `kube-multus-ds` is Ready on all nodes.
|
||||||
|
#
|
||||||
|
# Without those, applying this NAD has no effect except to register the CRD.
|
||||||
|
# A VM that requests this NAD with no bridge present will fail with:
|
||||||
|
# `error adding pod kubevirt-vms_ci1 to CNI network "prod-vlan57": failed to
|
||||||
|
# plumb VLAN: open /sys/class/net/br-prod/master: no such file or directory`
|
||||||
|
#
|
||||||
|
# Configuration notes:
|
||||||
|
# - cniVersion 0.3.1 to match Multus daemon-config.json
|
||||||
|
# - mtu 1500 (matches enp86s0 default; bump if jumbo frames configured)
|
||||||
|
# - bridge name `br-prod` is convention; if Puppet picks a different name
|
||||||
|
# (e.g. `br57`, `br-vlan57`), edit BOTH this NAD and the ci1.yaml
|
||||||
|
# interface block. Keep them in sync.
|
||||||
|
# - vlan: 0 because the host bridge already strips VLAN tag (br-prod sits
|
||||||
|
# on top of `enp86s0.57`). If we instead used a VLAN-aware bridge with
|
||||||
|
# trunk port, set vlan: 57 here. Current convention is VLAN-stripped at
|
||||||
|
# the sub-interface, so the bridge passes untagged frames.
|
||||||
|
#
|
||||||
|
# Apply:
|
||||||
|
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/prod-vlan57-nad.yaml
|
||||||
|
#
|
||||||
|
# Then update ci1.yaml networks: stanza to:
|
||||||
|
# - name: prod-net
|
||||||
|
# multus:
|
||||||
|
# networkName: kubevirt-vms/prod-vlan57
|
||||||
|
# and the interface block from `masquerade` to `bridge`.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
# Namespace must exist already (created by ci1.yaml's first document).
|
||||||
|
# This file imports a NAD into that same namespace.
|
||||||
|
apiVersion: k8s.cni.cncf.io/v1
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
metadata:
|
||||||
|
name: prod-vlan57
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/host-bridge: "br-prod (enslaves enp86s0.57)"
|
||||||
|
bluejay.iamworkin.lan/cidr: "10.0.57.0/24"
|
||||||
|
bluejay.iamworkin.lan/gateway: "10.0.57.1"
|
||||||
|
bluejay.iamworkin.lan/dns: "10.0.56.1 (pfSense Unbound)"
|
||||||
|
spec:
|
||||||
|
config: |
|
||||||
|
{
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"name": "prod-vlan57",
|
||||||
|
"type": "bridge",
|
||||||
|
"bridge": "br-prod",
|
||||||
|
"ipam": {},
|
||||||
|
"mtu": 1500,
|
||||||
|
"vlan": 0,
|
||||||
|
"promiscMode": true,
|
||||||
|
"preserveDefaultVlan": false
|
||||||
|
}
|
||||||
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Windows Server 2025 ISO — Static NFS PV (Path B for SATA-CDROM timeout)
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: Mount the ISO from Synology NAS via NFS instead of from a Longhorn-
|
||||||
|
# backed Filesystem PVC.
|
||||||
|
#
|
||||||
|
# Why: SATA-CDROM emulation reading from a Longhorn-backed Filesystem PVC is
|
||||||
|
# too slow for OVMF's boot read window — the DVD-ROM enumeration times out
|
||||||
|
# before the bootloader can be read. Symptom on the serial console:
|
||||||
|
# BdsDxe: failed to start Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ...
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found
|
||||||
|
# Diagnosis confirmed the ISO content is a perfectly valid bootable ISO9660
|
||||||
|
# image — the bug is in the timing path between OVMF and Longhorn-backed
|
||||||
|
# storage, not in the ISO itself.
|
||||||
|
#
|
||||||
|
# Block-mode PVC was tried (`volumeMode: Block` via DataVolume) and would
|
||||||
|
# likely fix the timing, but CDI v1.65.0's upload-target pod cannot open the
|
||||||
|
# block device due to runAsUser:107 + capabilities.drop:[ALL] and we got:
|
||||||
|
# blockdev: cannot open /dev/cdi-block-volume: Permission denied
|
||||||
|
#
|
||||||
|
# NFS-mounted ISO bypasses both issues: no Longhorn slowness, no CDI upload
|
||||||
|
# pod permission concerns. The ISO is read directly from the NAS over a
|
||||||
|
# native NFSv4.1 mount that QEMU's SATA emulator can read at full LAN speed.
|
||||||
|
#
|
||||||
|
# Layout on Synology:
|
||||||
|
# /volume1/ISOs/ (existing export, RKE2 ACL)
|
||||||
|
# en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso
|
||||||
|
# win2025-iso-disk/ (new subdir, 2026-05-08)
|
||||||
|
# disk.img -> hardlink to ../en-us_windows_server_2025_..._8e06425a.iso
|
||||||
|
#
|
||||||
|
# KubeVirt's launcher pod expects a PVC mounted at
|
||||||
|
# /var/run/kubevirt-private/vmi-disks/<diskName>/disk.img — by mounting the
|
||||||
|
# `win2025-iso-disk/` subdir as the NFS PV root, `disk.img` lives at the PV's
|
||||||
|
# root and KubeVirt's CDROM emulator finds it without any path manipulation.
|
||||||
|
#
|
||||||
|
# A symlink would NOT work for sub-path NFS mounts (the relative target
|
||||||
|
# `../...iso` falls outside the sub-mount root). A hardlink works because it
|
||||||
|
# references the same inode regardless of mount point.
|
||||||
|
#
|
||||||
|
# Memory references:
|
||||||
|
# - feedback_synology_nfs_volume1_kubernetes_export_scoped (Synology export
|
||||||
|
# scoping pattern — but /volume1/ISOs export, unlike /volume1/kubernetes,
|
||||||
|
# does support sub-path mounts because Synology NFS is configured with
|
||||||
|
# pseudo-fs in NFSv4.1)
|
||||||
|
# - feedback_kubevirt_iso_first_install_bootorder_and_runstrategy (boot
|
||||||
|
# order / runStrategy gotchas, separate from the storage timing issue)
|
||||||
|
#
|
||||||
|
# Validation (2026-05-08, from rke2-server / rke2-agent1 / rke2-agent2):
|
||||||
|
# mount -t nfs -o nfsvers=4.1,ro 10.0.58.3:/volume1/ISOs/win2025-iso-disk /tmp/m
|
||||||
|
# file /tmp/m/disk.img
|
||||||
|
# -> ISO 9660 CD-ROM filesystem data 'SSS_X64FRE_EN-US_DV9' (bootable)
|
||||||
|
# All 3 RKE2 nodes can mount and read.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
labels:
|
||||||
|
flowercore.io/iso: windows-server-2025
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 8Gi
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: "" # static, no provisioner
|
||||||
|
mountOptions:
|
||||||
|
- nfsvers=4.1
|
||||||
|
- ro
|
||||||
|
- hard
|
||||||
|
- timeo=600
|
||||||
|
- retrans=3
|
||||||
|
nfs:
|
||||||
|
server: 10.0.58.3 # BlueJayNAS Synology DS1621+ on HOME VLAN 58
|
||||||
|
path: /volume1/ISOs/win2025-iso-disk
|
||||||
|
readOnly: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 8Gi
|
||||||
|
storageClassName: ""
|
||||||
|
volumeName: windows-server-2025-iso-nfs
|
||||||
@@ -207,20 +207,13 @@ spec:
|
|||||||
- port: 993
|
- port: 993
|
||||||
targetPort: 993
|
targetPort: 993
|
||||||
name: imaps
|
name: imaps
|
||||||
---
|
# --- mail-tls Certificate REMOVED 2026-06-01 ---
|
||||||
# TLS Certificate via cert-manager
|
# mail-tls is now managed OUTSIDE cert-manager: issued from step-ca's JWK 'admin'
|
||||||
apiVersion: cert-manager.io/v1
|
# provisioner and auto-renewed by a systemd timer on noc1 (step ca renew), which
|
||||||
kind: Certificate
|
# writes the mail-tls secret directly. step-ca-acme only has an HTTP-01 (Traefik)
|
||||||
metadata:
|
# solver, but mail.iamworkin.lan must resolve to the dedicated MetalLB IP 10.0.56.202
|
||||||
name: mail-tls
|
# (SMTP/IMAP), so HTTP-01 cannot validate. Do NOT re-add a cert-manager Certificate
|
||||||
namespace: mail
|
# here unless a DNS-01 solver is deployed for step-ca-acme.
|
||||||
spec:
|
|
||||||
secretName: mail-tls
|
|
||||||
issuerRef:
|
|
||||||
name: step-ca-acme
|
|
||||||
kind: ClusterIssuer
|
|
||||||
dnsNames:
|
|
||||||
- mail.iamworkin.lan
|
|
||||||
---
|
---
|
||||||
# Traefik IngressRoute - Webmail placeholder
|
# Traefik IngressRoute - Webmail placeholder
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
|||||||
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"icon": "external link",
|
||||||
|
"includeVars": false,
|
||||||
|
"keepTime": false,
|
||||||
|
"targetBlank": true,
|
||||||
|
"title": "Open Service",
|
||||||
|
"type": "link",
|
||||||
|
"url": "https://updatecenter.iamworkin.lan/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"0": {
|
||||||
|
"color": "#f87171",
|
||||||
|
"index": 1,
|
||||||
|
"text": "DOWN"
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"color": "#4ade80",
|
||||||
|
"index": 0,
|
||||||
|
"text": "UP"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Availability"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Service Availability",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"decimals": 2,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 95
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#FFB300",
|
||||||
|
"value": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 99.9
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percent"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 8,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background_solid",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "avg_over_time(probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}[24h]) * 100",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "24h Uptime"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24-Hour Uptime",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"max": 30,
|
||||||
|
"min": 0,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 7
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "d"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 16,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"minVizHeight": 75,
|
||||||
|
"minVizWidth": 75,
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showThresholdLabels": false,
|
||||||
|
"showThresholdMarkers": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "(probe_ssl_earliest_cert_expiry{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"} - time()) / 86400",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Days Remaining"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Cert Expiry (Days)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "gauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "Response Time (seconds)",
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 12,
|
||||||
|
"gradientMode": "scheme",
|
||||||
|
"lineInterpolation": "smooth",
|
||||||
|
"lineWidth": 2,
|
||||||
|
"pointSize": 4,
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": true,
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "dashed"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 14,
|
||||||
|
"x": 0,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull",
|
||||||
|
"mean",
|
||||||
|
"max"
|
||||||
|
],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_duration_seconds{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Probe Duration"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timeFrom": "1h",
|
||||||
|
"title": "Response Time (1h Trend)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 10,
|
||||||
|
"x": 14,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"alertInstanceLabelFilter": "{instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"alertName": "",
|
||||||
|
"dashboardAlerts": false,
|
||||||
|
"groupBy": [],
|
||||||
|
"groupMode": "default",
|
||||||
|
"maxItems": 10,
|
||||||
|
"sortOrder": 1,
|
||||||
|
"stateFilter": {
|
||||||
|
"error": true,
|
||||||
|
"firing": true,
|
||||||
|
"noData": true,
|
||||||
|
"normal": false,
|
||||||
|
"pending": true
|
||||||
|
},
|
||||||
|
"viewMode": "list"
|
||||||
|
},
|
||||||
|
"title": "Active Alerts",
|
||||||
|
"type": "alertlist"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 1,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"id": 20,
|
||||||
|
"title": "OTEL Counters — Track 1D",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 21,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_manifest_requests_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Manifest Requests rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 22,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug) (rate(updatecenter_bundle_download_bytes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Throughput by slug (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 23,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_checkins_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Agent Check-in Rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 1 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "none",
|
||||||
|
"decimals": 2
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 24,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["sum"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "increase(updatecenter_signature_verify_failures_total[1h])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig Verify Failures (1h)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failures (1h)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 25,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug, channel) (rate(updatecenter_release_publishes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}/{{channel}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Release Publishes rate by slug/channel (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 26,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (kind, status) (rate(updatecenter_bundle_downloads_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{kind}} / {{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Requests by kind/status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 2,
|
||||||
|
"fillOpacity": 20
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 0.01 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 27,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "rate(updatecenter_signature_verify_failures_total[5m])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig verify failures/s"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failure Rate (5m) — Critical if >0",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"blue-jay",
|
||||||
|
"flowercore",
|
||||||
|
"synthetic",
|
||||||
|
"updatecenter",
|
||||||
|
"otel"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "FlowerCore.UpdateCenter Dashboard",
|
||||||
|
"uid": "fc-updatecenter",
|
||||||
|
"version": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,226 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum by (event) (increase(fc_desktop_session_events_total[$__rate_interval]))",
|
||||||
|
"legendFormat": "{{event}}",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "RemoteDesktop Session Events",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showUnfilled": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum by (template, event) (increase(fc_desktop_session_events_total[24h]))",
|
||||||
|
"legendFormat": "{{template}} {{event}}",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24h Session Events By Template",
|
||||||
|
"type": "bargauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "fc_desktop_pool_ready",
|
||||||
|
"legendFormat": "{{template}} ready",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "fc_desktop_pool_desired",
|
||||||
|
"legendFormat": "{{template}} desired",
|
||||||
|
"range": true,
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Warm Pool Ready vs Desired",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "orange",
|
||||||
|
"value": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum(increase(fc_desktop_session_events_total{event=\"connect\",browser_datasource=\"json\"}[24h])) - sum(increase(fc_desktop_session_events_total{event=\"disconnect\"}[24h]))",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24h Connect Minus Disconnect",
|
||||||
|
"type": "stat"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"flowercore",
|
||||||
|
"remotedesktop",
|
||||||
|
"guacamole"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "FlowerCore RemoteDesktop",
|
||||||
|
"uid": "flowercore-remotedesktop",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
@@ -75,6 +75,20 @@ data:
|
|||||||
cluster: "rke2"
|
cluster: "rke2"
|
||||||
role: "agent"
|
role: "agent"
|
||||||
|
|
||||||
|
# Mac mini macOS runner node (INFRA VLAN)
|
||||||
|
- job_name: "macmini-node"
|
||||||
|
scrape_timeout: 15s
|
||||||
|
static_configs:
|
||||||
|
- targets: ["10.0.56.115:9100"]
|
||||||
|
labels:
|
||||||
|
instance: "macmini"
|
||||||
|
host: "macmini.iamworkin.lan"
|
||||||
|
vlan: "infra"
|
||||||
|
arch: "arm64"
|
||||||
|
role: "macos-runner"
|
||||||
|
puppet_managed: "true"
|
||||||
|
puppet_server: "puppet.iamworkin.lan"
|
||||||
|
|
||||||
# In-cluster node-exporter DaemonSet
|
# In-cluster node-exporter DaemonSet
|
||||||
- job_name: "k8s-node-exporter"
|
- job_name: "k8s-node-exporter"
|
||||||
kubernetes_sd_configs:
|
kubernetes_sd_configs:
|
||||||
@@ -266,13 +280,14 @@ data:
|
|||||||
printer_model: "NuPrint 210"
|
printer_model: "NuPrint 210"
|
||||||
|
|
||||||
# Print.Web health (Blazor app on edge2:5200)
|
# Print.Web health (Blazor app on edge2:5200)
|
||||||
|
# Target `/health` (anonymous) — root path requires API key auth and returns 401.
|
||||||
- job_name: "probe-printweb"
|
- job_name: "probe-printweb"
|
||||||
metrics_path: /probe
|
metrics_path: /probe
|
||||||
params:
|
params:
|
||||||
module: [http_2xx]
|
module: [http_2xx]
|
||||||
scrape_interval: 30s
|
scrape_interval: 30s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["http://10.0.57.16:5200/"]
|
- targets: ["http://10.0.57.16:5200/health"]
|
||||||
labels:
|
labels:
|
||||||
instance: "print-web"
|
instance: "print-web"
|
||||||
service: "print-web"
|
service: "print-web"
|
||||||
@@ -464,11 +479,11 @@ data:
|
|||||||
- "https://gitea.iamworkin.lan/"
|
- "https://gitea.iamworkin.lan/"
|
||||||
- "https://argocd.iamworkin.lan/"
|
- "https://argocd.iamworkin.lan/"
|
||||||
- "https://intranet.iamworkin.lan/"
|
- "https://intranet.iamworkin.lan/"
|
||||||
- "https://signage.iamworkin.lan/"
|
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://kiosk.iamworkin.lan/"
|
- "https://kiosk.iamworkin.lan/"
|
||||||
- "https://media.iamworkin.lan/"
|
- "https://media.iamworkin.lan/"
|
||||||
- "https://mysql.iamworkin.lan/"
|
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://php.iamworkin.lan/"
|
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://zabbix.iamworkin.lan/"
|
- "https://zabbix.iamworkin.lan/"
|
||||||
- "https://desktop.iamworkin.lan/"
|
- "https://desktop.iamworkin.lan/"
|
||||||
- "https://print.iamworkin.lan/"
|
- "https://print.iamworkin.lan/"
|
||||||
@@ -697,6 +712,36 @@ data:
|
|||||||
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
||||||
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
||||||
|
|
||||||
|
- name: macmini-runners
|
||||||
|
rules:
|
||||||
|
- alert: MacMiniRunnerOffline
|
||||||
|
expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
annotations:
|
||||||
|
summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
|
||||||
|
description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
|
||||||
|
|
||||||
|
- name: linux-runners
|
||||||
|
rules:
|
||||||
|
- alert: LinuxRunnerOffline
|
||||||
|
expr: |
|
||||||
|
kube_deployment_status_replicas_ready{
|
||||||
|
namespace="github-runner",
|
||||||
|
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
|
||||||
|
} == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
service: github-runner
|
||||||
|
team: ci
|
||||||
|
annotations:
|
||||||
|
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||||
|
description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
|
||||||
|
|
||||||
- name: remote-desktop
|
- name: remote-desktop
|
||||||
rules:
|
rules:
|
||||||
- alert: RemoteDesktopWebDown
|
- alert: RemoteDesktopWebDown
|
||||||
@@ -922,6 +967,52 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
|
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
|
||||||
|
|
||||||
|
# Puppet agent + service alerts.
|
||||||
|
# Mirror of FlowerCore.Notes/scripts/monitoring/alerts.yml `puppet` group
|
||||||
|
# so a future migration to in-cluster Prometheus inherits the ruleset.
|
||||||
|
# Source-of-truth for the live Podman Prometheus on noc1 is the Notes file.
|
||||||
|
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||||
|
- name: puppet
|
||||||
|
rules:
|
||||||
|
- alert: PuppetAgentReportStale
|
||||||
|
expr: puppet_last_run_age_seconds > 7200
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet agent {{ $labels.instance }} hasn't reported in over 2h"
|
||||||
|
description: "Last run age: {{ $value | humanizeDuration }}. The puppet agent on {{ $labels.instance }} may be stopped, the node may be powered off, or noc1 may be unreachable from this node."
|
||||||
|
runbook: "1. SSH to node (via noc1 jumpbox if needed) 2. sudo systemctl status puppet 3. sudo puppet agent -t --noop to force a run 4. Check r10k: ssh fcadmin@10.0.56.10 'sudo podman logs openvoxserver --tail 50' 5. Verify noc1 reachability: ping puppet.iamworkin.lan"
|
||||||
|
|
||||||
|
- alert: PuppetAgentReportCritical
|
||||||
|
expr: puppet_last_run_age_seconds > 86400
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet agent {{ $labels.instance }} silent for over 24h — node is unmanaged"
|
||||||
|
description: "Last run age: {{ $value | humanizeDuration }}. Node {{ $labels.instance }} has not submitted a Puppet report in over 24 hours. Config drift is accumulating — investigate immediately. If intentional (maintenance), add to the exclusion filter or silence in Grafana."
|
||||||
|
runbook: "URGENT: 1. Check node power state 2. SSH via noc1 jumpbox: ssh fcadmin@10.0.56.10 then ssh <node> 3. sudo systemctl status puppet 4. sudo systemctl start puppet + sudo puppet agent -t 5. Check for network partitions (VLAN connectivity to 10.0.56.10) 6. If node was recently reimaged: sudo puppet agent -t to re-register with new SSL cert"
|
||||||
|
|
||||||
|
# Sprint 33 Cx-7 Phase B (2026-05-25 postmortem follow-up):
|
||||||
|
# Detects puppet.service in failed state — distinct from PuppetAgentReportStale
|
||||||
|
# which catches "agent hasn't run." This catches "systemd gave up restarting it"
|
||||||
|
# (CA-verify loop or other fatal exit). Requires node-exporter systemd collector
|
||||||
|
# enabled with --collector.systemd. If `node_systemd_unit_state` has no series
|
||||||
|
# for a node, the collector is disabled there — flag in postmortem follow-up.
|
||||||
|
- alert: PuppetServiceFailed
|
||||||
|
expr: node_systemd_unit_state{name="puppet.service",state="failed"} == 1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet service failed on {{ $labels.instance }}"
|
||||||
|
description: "puppet.service on {{ $labels.instance }} has been in failed state for 5+ minutes. systemd has stopped auto-restarting (CA-verify-loop or other exit). Manual `systemctl status puppet` confirms. Run `sudo systemctl start puppet` to recover; investigate journal for root cause."
|
||||||
|
runbook_url: "https://github.com/astoltz/FlowerCore.Notes/blob/master/memory/feedback_puppet_service_dead_after_ca_loop_alert_misreads.md"
|
||||||
|
|
||||||
# K8s pod-state alerts. Require kube-state-metrics scrape (added
|
# K8s pod-state alerts. Require kube-state-metrics scrape (added
|
||||||
# 2026-04-26 — see scrape_configs above). Would have surfaced the
|
# 2026-04-26 — see scrape_configs above). Would have surfaced the
|
||||||
# agent-zero ollama-proxy 172x crash-loop instead of letting it
|
# agent-zero ollama-proxy 172x crash-loop instead of letting it
|
||||||
@@ -974,6 +1065,39 @@ data:
|
|||||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
||||||
|
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
||||||
|
# outage (21h) hit because no alert fired on the rising multus working
|
||||||
|
# set — only downstream blackbox / Traefik / service alerts. With
|
||||||
|
# 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
|
||||||
|
# runs ~150-250MiB so this only fires when an avalanche starts.
|
||||||
|
- alert: MultusMemoryPressure
|
||||||
|
expr: |
|
||||||
|
container_memory_working_set_bytes{container="kube-multus"}
|
||||||
|
/ container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
alert_channel: thermal_print
|
||||||
|
annotations:
|
||||||
|
summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
|
||||||
|
description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
|
||||||
|
# operator-leak avalanche pattern BEFORE it cascades into a multus
|
||||||
|
# CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
|
||||||
|
# emitting pods without ownerReferences will accumulate them when
|
||||||
|
# the operator crashes. >25 pending pods in any namespace for 30m
|
||||||
|
# is the signal to investigate the reconciler.
|
||||||
|
- alert: NamespacePendingPodBacklog
|
||||||
|
expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
|
||||||
|
description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
|
||||||
|
|
||||||
# Longhorn storage health alerts. Required: longhorn scrape job
|
# Longhorn storage health alerts. Required: longhorn scrape job
|
||||||
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
||||||
# for "snapshot becomes not ready to use" are transient lifecycle
|
# for "snapshot becomes not ready to use" are transient lifecycle
|
||||||
@@ -1024,6 +1148,72 @@ data:
|
|||||||
summary: "Longhorn node {{ $labels.node }} not Ready"
|
summary: "Longhorn node {{ $labels.node }} not Ready"
|
||||||
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# FC Signage Marquee Performance — Track 3 + 8 (2026-05-06)
|
||||||
|
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||||
|
# Source-of-truth for the live Podman Prometheus on noc1 is the
|
||||||
|
# Notes file; this K8s ConfigMap exists so a future migration to
|
||||||
|
# in-cluster Prometheus inherits the ruleset automatically.
|
||||||
|
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||||
|
# ============================================================
|
||||||
|
- name: fc-signage-marquee
|
||||||
|
rules:
|
||||||
|
- alert: MarqueeDroppedFramesHigh
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_dropped_frames_total[5m]))
|
||||||
|
/
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_render_latency_ms_count[5m]))
|
||||||
|
) > 0.05
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_dropped_frames_total[7d])
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee dropped-frame rate >5% on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Renderer {{ $labels.renderer }} on {{ $labels.node_id }} drops >5% of frames during {{ $labels.phase }}. Animation visibly stuttery."
|
||||||
|
|
||||||
|
- alert: MarqueeRenderLatencyP99High
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(
|
||||||
|
0.99,
|
||||||
|
sum by (renderer, phase, node_id, le) (rate(marquee_render_latency_ms_bucket[5m]))
|
||||||
|
) > 16
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_render_latency_ms_bucket[7d])
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee render latency p99 > 16ms on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Per-frame render latency p99 has exceeded the Pi-class 16ms budget for 10 minutes."
|
||||||
|
|
||||||
|
- alert: MarqueeAnimationDurationDrift
|
||||||
|
expr: |
|
||||||
|
abs(
|
||||||
|
histogram_quantile(0.5, sum by (renderer, phase, le) (rate(marquee_animation_duration_ms_bucket[15m])))
|
||||||
|
-
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
)
|
||||||
|
/
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
> 0.10
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_animation_duration_ms_bucket[7d])
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||||
|
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# ConfigMap: Blackbox Exporter Configuration
|
# ConfigMap: Blackbox Exporter Configuration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -1084,24 +1274,55 @@ metadata:
|
|||||||
data:
|
data:
|
||||||
notify.py: |
|
notify.py: |
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
|
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
|
||||||
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
|
|
||||||
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
|
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
|
||||||
|
/api/print/alert. Thermal printing is BATCHED into hourly digests by
|
||||||
|
default so the printer no longer spam-fires per Grafana webhook.
|
||||||
|
|
||||||
|
Routing (per Grafana webhook alert):
|
||||||
|
- IRC: always per-event (operator likes the stream)
|
||||||
|
- Thermal printer:
|
||||||
|
* severity in {critical,disaster,page} OR
|
||||||
|
label alert_channel=thermal_print_immediate -> print NOW
|
||||||
|
* label alert_channel=thermal_print -> enqueue into hourly digest
|
||||||
|
* everything else -> IRC only
|
||||||
|
- RESOLVED webhooks remove the alert from the digest buffer
|
||||||
|
|
||||||
|
Env vars (defaults preserve old behavior on first deploy):
|
||||||
|
THERMAL_PRINT_ENABLED default "true" - master kill switch
|
||||||
|
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
|
||||||
|
BATCH_MAX_PENDING default "50" - force-flush threshold
|
||||||
|
|
||||||
|
HTTP surface:
|
||||||
|
POST / - Grafana webhook entry
|
||||||
|
POST /flush - manual digest flush (idempotent)
|
||||||
|
GET / - status + config + buffer depth + stats
|
||||||
"""
|
"""
|
||||||
import json, socket, sys, time
|
import json, os, socket, sys, threading, time
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
from urllib.error import URLError
|
|
||||||
|
|
||||||
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
|
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
|
||||||
IRC_PORT = 6667
|
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
|
||||||
IRC_NICK = "grafana-bot"
|
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
|
||||||
IRC_CHANNEL = "#alerts"
|
|
||||||
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
|
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
|
||||||
PRINT_ENABLED = True
|
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
|
||||||
|
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
|
||||||
|
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
|
||||||
|
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
|
||||||
|
|
||||||
|
_buffer_lock = threading.Lock()
|
||||||
|
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
|
||||||
|
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
|
||||||
|
"buffer_resolved": 0, "started_at": time.time()}
|
||||||
|
|
||||||
def send_irc(message):
|
def send_irc(message):
|
||||||
"""Connect, handle PING, join, send, quit."""
|
|
||||||
try:
|
try:
|
||||||
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
||||||
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
||||||
@@ -1134,52 +1355,137 @@ data:
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
sock.sendall(b"QUIT :alert delivered\r\n")
|
sock.sendall(b"QUIT :alert delivered\r\n")
|
||||||
sock.close()
|
sock.close()
|
||||||
|
_stats["irc_sent"] += 1
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def send_thermal_print(alert):
|
def post_thermal(payload, kind):
|
||||||
if not PRINT_ENABLED: return
|
if not THERMAL_PRINT_ENABLED:
|
||||||
labels = alert.get("labels", {})
|
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
|
||||||
annotations = alert.get("annotations", {})
|
return False
|
||||||
status = alert.get("status", "firing").upper()
|
|
||||||
summary = annotations.get("summary", "")
|
|
||||||
description = annotations.get("description", "")
|
|
||||||
runbook = annotations.get("runbook", "")
|
|
||||||
# Build a useful message: summary + description + runbook steps
|
|
||||||
parts = []
|
|
||||||
if summary: parts.append(summary)
|
|
||||||
if description and description != summary: parts.append(description)
|
|
||||||
if runbook: parts.append("STEPS: " + runbook)
|
|
||||||
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
|
|
||||||
payload = {
|
|
||||||
"title": labels.get("alertname", "Unknown"),
|
|
||||||
"severity": labels.get("severity", "warning").capitalize(),
|
|
||||||
"host": labels.get("instance", labels.get("host", "unknown")),
|
|
||||||
"message": message,
|
|
||||||
"eventId": alert.get("fingerprint", ""),
|
|
||||||
"source": "Grafana",
|
|
||||||
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
|
|
||||||
"acknowledged": False
|
|
||||||
}
|
|
||||||
try:
|
try:
|
||||||
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
||||||
headers={"Content-Type": "application/json"}, method="POST")
|
headers={"Content-Type": "application/json"}, method="POST")
|
||||||
resp = urlopen(req, timeout=10)
|
resp = urlopen(req, timeout=10)
|
||||||
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
|
if kind == "immediate": _stats["print_immediate"] += 1
|
||||||
|
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
|
||||||
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
|
||||||
|
|
||||||
def should_print(alert):
|
|
||||||
labels = alert.get("labels", {})
|
|
||||||
if labels.get("alert_channel") == "thermal_print": return True
|
|
||||||
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
|
|
||||||
if alert.get("status", "").upper() == "RESOLVED": return False
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def fingerprint_of(alert):
|
||||||
|
fp = alert.get("fingerprint", "")
|
||||||
|
if fp: return fp
|
||||||
|
labels = alert.get("labels", {})
|
||||||
|
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
|
||||||
|
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
|
||||||
|
|
||||||
|
def is_critical(alert):
|
||||||
|
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
|
||||||
|
|
||||||
|
def is_immediate_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
|
||||||
|
|
||||||
|
def is_batched_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
|
||||||
|
|
||||||
|
def add_to_digest(alert):
|
||||||
|
"""Add an alert to the digest buffer. Returns True if the buffer GREW
|
||||||
|
(new fingerprint), False if it was a dedup, resolution, or no-op.
|
||||||
|
"""
|
||||||
|
if not THERMAL_PRINT_ENABLED: return False
|
||||||
|
fp = fingerprint_of(alert)
|
||||||
|
status = alert.get("status", "firing").lower()
|
||||||
|
with _buffer_lock:
|
||||||
|
if status == "resolved":
|
||||||
|
if fp in _buffer:
|
||||||
|
del _buffer[fp]
|
||||||
|
_stats["buffer_resolved"] += 1
|
||||||
|
return False
|
||||||
|
if fp in _buffer:
|
||||||
|
_buffer[fp]["last_seen"] = time.time()
|
||||||
|
_buffer[fp]["alert"] = alert
|
||||||
|
_stats["buffer_dedup"] += 1
|
||||||
|
return False
|
||||||
|
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
|
||||||
|
_stats["buffer_added"] += 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
def build_digest_payload():
|
||||||
|
with _buffer_lock:
|
||||||
|
items = list(_buffer.values())
|
||||||
|
if not items: return None
|
||||||
|
by_name = defaultdict(list)
|
||||||
|
for item in items:
|
||||||
|
labels = item["alert"].get("labels", {})
|
||||||
|
by_name[labels.get("alertname", "Unknown")].append(item)
|
||||||
|
lines = []
|
||||||
|
for name, group in sorted(by_name.items()):
|
||||||
|
targets = []
|
||||||
|
for it in group[:5]:
|
||||||
|
labels = it["alert"].get("labels", {})
|
||||||
|
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
|
||||||
|
or labels.get("statefulset") or labels.get("namespace") or "?")
|
||||||
|
targets.append(t)
|
||||||
|
more = f" (+{len(group)-5})" if len(group) > 5 else ""
|
||||||
|
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
|
||||||
|
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
|
||||||
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||||
|
title = f"Alert digest: {len(items)} firing"
|
||||||
|
body = "\n".join([
|
||||||
|
f"=== {title} ===",
|
||||||
|
f"as of {now}",
|
||||||
|
"",
|
||||||
|
*lines,
|
||||||
|
"",
|
||||||
|
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
|
||||||
|
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
|
||||||
|
])
|
||||||
|
return {"title": title, "severity": "Warning", "host": "monitoring",
|
||||||
|
"message": body, "eventId": f"digest-{int(time.time())}",
|
||||||
|
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
|
||||||
|
|
||||||
|
def flush_digest():
|
||||||
|
payload = build_digest_payload()
|
||||||
|
if payload is None:
|
||||||
|
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
sent = post_thermal(payload, "digest")
|
||||||
|
with _buffer_lock:
|
||||||
|
_buffer.clear()
|
||||||
|
if sent: _stats["digest_flushed"] += 1
|
||||||
|
return sent
|
||||||
|
|
||||||
|
def digest_loop():
|
||||||
|
global _last_flush_time
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
now = time.time()
|
||||||
|
elapsed = now - _last_flush_time
|
||||||
|
if elapsed >= BATCH_INTERVAL_MIN * 60:
|
||||||
|
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
elif len(_buffer) >= BATCH_MAX_PENDING:
|
||||||
|
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
time.sleep(15)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
|
if self.path == "/flush":
|
||||||
|
ok = flush_digest()
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
|
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
|
||||||
|
return
|
||||||
|
_stats["webhooks_received"] += 1
|
||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
body = json.loads(self.rfile.read(length)) if length else {}
|
body = json.loads(self.rfile.read(length)) if length else {}
|
||||||
for alert in body.get("alerts", []):
|
for alert in body.get("alerts", []):
|
||||||
@@ -1194,22 +1500,56 @@ data:
|
|||||||
msg = f"{icon}{sev_tag} {name}: {summary}"
|
msg = f"{icon}{sev_tag} {name}: {summary}"
|
||||||
if desc: msg += f"\n {desc}"
|
if desc: msg += f"\n {desc}"
|
||||||
send_irc(msg)
|
send_irc(msg)
|
||||||
if should_print(alert): send_thermal_print(alert)
|
# Thermal routing — EVERYTHING (including criticals) goes into
|
||||||
self.send_response(200)
|
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
|
||||||
self.send_header("Content-Type", "application/json")
|
# label bypasses, and even that flushes-the-current-digest rather
|
||||||
self.end_headers()
|
# than printing a standalone job, so the same fingerprint can't
|
||||||
|
# spam the printer per webhook cycle.
|
||||||
|
if status == "RESOLVED":
|
||||||
|
add_to_digest(alert) # removes from buffer
|
||||||
|
continue
|
||||||
|
if is_immediate_label(alert):
|
||||||
|
# Explicit opt-in for "paper this NOW" — first arrival of a
|
||||||
|
# new fingerprint triggers an immediate digest flush; repeat
|
||||||
|
# webhooks for the same fingerprint dedupe in the buffer
|
||||||
|
# until the next interval or until the alert resolves.
|
||||||
|
new_in_buffer = add_to_digest(alert)
|
||||||
|
if new_in_buffer:
|
||||||
|
global _last_flush_time
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
elif is_critical(alert) or is_batched_label(alert):
|
||||||
|
add_to_digest(alert)
|
||||||
|
# else: IRC-only (warnings without thermal_print label)
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.wfile.write(b'{"status":"ok"}')
|
self.wfile.write(b'{"status":"ok"}')
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self.send_response(200)
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.send_header("Content-Type", "application/json")
|
with _buffer_lock:
|
||||||
self.end_headers()
|
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
|
||||||
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
|
depth = len(_buffer)
|
||||||
|
info = {
|
||||||
|
"service": "irc-notify",
|
||||||
|
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
|
||||||
|
"batch_interval_min": BATCH_INTERVAL_MIN,
|
||||||
|
"batch_max_pending": BATCH_MAX_PENDING,
|
||||||
|
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
|
||||||
|
"print_web_url": PRINT_WEB_URL},
|
||||||
|
"buffer": {"depth": depth, "alertnames": alertnames,
|
||||||
|
"seconds_since_last_flush": int(time.time() - _last_flush_time),
|
||||||
|
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
|
||||||
|
"stats": _stats,
|
||||||
|
}
|
||||||
|
self.wfile.write(json.dumps(info, indent=2).encode())
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
threading.Thread(target=digest_loop, daemon=True).start()
|
||||||
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
||||||
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
|
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -3296,6 +3636,39 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
||||||
|
- orgId: 1
|
||||||
|
name: CI Runners
|
||||||
|
folder: CI Alerts
|
||||||
|
interval: 1m
|
||||||
|
rules:
|
||||||
|
- uid: linux-runner-offline
|
||||||
|
title: LinuxRunnerOffline
|
||||||
|
condition: C
|
||||||
|
for: 5m
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Error
|
||||||
|
annotations:
|
||||||
|
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||||
|
description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
|
||||||
|
runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
alert_channel: irc
|
||||||
|
team: ci
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
name: Infrastructure
|
name: Infrastructure
|
||||||
folder: AI Stack Alerts
|
folder: AI Stack Alerts
|
||||||
@@ -3328,6 +3701,32 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
|
- uid: macmini-runner-offline
|
||||||
|
title: MacMiniRunnerOffline
|
||||||
|
condition: C
|
||||||
|
for: 10m
|
||||||
|
noDataState: Alerting
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: Mac mini GitHub runner offline
|
||||||
|
description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
|
||||||
|
runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
- uid: high-cpu
|
- uid: high-cpu
|
||||||
title: High CPU (>85%)
|
title: High CPU (>85%)
|
||||||
condition: C
|
condition: C
|
||||||
|
|||||||
297
apps/multus/multus.yaml
Normal file
297
apps/multus/multus.yaml
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Multus CNI — Meta-CNI for multi-network attachment to pods/VMs
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: enable KubeVirt VMs (and any future workload) to attach additional
|
||||||
|
# network interfaces beyond the default Calico-managed pod network. Required
|
||||||
|
# for ci1 (Windows Server 2025 KubeVirt VM) to bridge onto PROD VLAN 57.
|
||||||
|
#
|
||||||
|
# Source: upstream k8snetworkplumbingwg/multus-cni v4.2.2
|
||||||
|
# https://github.com/k8snetworkplumbingwg/multus-cni/blob/v4.2.2/deployments/multus-daemonset-thick.yml
|
||||||
|
#
|
||||||
|
# Inlined verbatim (with project header + version pin annotation) for
|
||||||
|
# reproducibility and air-gap safety. Bumping versions = edit this file +
|
||||||
|
# git push. ArgoCD picks up via the bluejay-infra ApplicationSet
|
||||||
|
# (apps/* directory generator on main).
|
||||||
|
#
|
||||||
|
# Why thick plugin (not thin):
|
||||||
|
# - Thick = daemon + thin shim binary; daemon handles NAD watch + CRD reads
|
||||||
|
# centrally so each pod's CNI ADD doesn't hit the K8s API server. Better
|
||||||
|
# for clusters with many NAD-using pods.
|
||||||
|
# - Thin = each CNI ADD process directly contacts K8s API. Simpler but
|
||||||
|
# scales worse and has more failure modes.
|
||||||
|
# - KubeVirt + multi-VM workload pattern fits thick perfectly.
|
||||||
|
#
|
||||||
|
# Cluster context (verified 2026-05-08):
|
||||||
|
# - RKE2 v1.34.5 on 3 nodes (rke2-server, rke2-agent1, rke2-agent2)
|
||||||
|
# - Calico CNI (Tigera-managed) at /etc/cni/net.d + /opt/cni/bin (default)
|
||||||
|
# - openSUSE Leap 16, kernel 6.12, containerd 2.1.5
|
||||||
|
# - host bridge for PROD VLAN 57 = `br-prod` (PUPPET HOST WORK — see Phase 1.5
|
||||||
|
# in docs/infrastructure/windows-server-build-runner-plan.md)
|
||||||
|
#
|
||||||
|
# Version pin: snapshot-thick → pinning to v4.2.2 release tag at deploy time
|
||||||
|
# would require a private mirror of the image. Upstream `snapshot-thick` tag
|
||||||
|
# is updated on every release, so for now we trust upstream + Calico's
|
||||||
|
# established pattern. Pin to a specific SHA256 once we mirror to Gitea OCI.
|
||||||
|
#
|
||||||
|
# Apply (once committed to bluejay-infra main, ApplicationSet auto-syncs):
|
||||||
|
# git add apps/multus/multus.yaml && git commit && git push origin main
|
||||||
|
# # ArgoCD `infra-multus` Application appears within 3 min via ApplicationSet
|
||||||
|
#
|
||||||
|
# Verify:
|
||||||
|
# kubectl -n kube-system get ds kube-multus-ds
|
||||||
|
# kubectl -n kube-system rollout status ds kube-multus-ds
|
||||||
|
# kubectl get crd network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
name: network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "k8snetworkplumbingwg/multus-cni v4.2.2"
|
||||||
|
spec:
|
||||||
|
group: k8s.cni.cncf.io
|
||||||
|
scope: Namespaced
|
||||||
|
names:
|
||||||
|
plural: network-attachment-definitions
|
||||||
|
singular: network-attachment-definition
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
shortNames:
|
||||||
|
- net-attach-def
|
||||||
|
versions:
|
||||||
|
- name: v1
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing
|
||||||
|
Working Group to express the intent for attaching pods to one or more logical or physical
|
||||||
|
networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
config:
|
||||||
|
description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration'
|
||||||
|
type: string
|
||||||
|
---
|
||||||
|
kind: ClusterRole
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["k8s.cni.cncf.io"]
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- pods/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
- events.k8s.io
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
---
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: multus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
kind: ConfigMap
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: multus-daemon-config
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
data:
|
||||||
|
daemon-config.json: |
|
||||||
|
{
|
||||||
|
"chrootDir": "/hostroot",
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"logLevel": "verbose",
|
||||||
|
"logToStderr": true,
|
||||||
|
"cniConfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusAutoconfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusConfigFile": "auto",
|
||||||
|
"socketDir": "/host/run/multus/"
|
||||||
|
}
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: kube-multus-ds
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: multus
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
tolerations:
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
serviceAccountName: multus
|
||||||
|
containers:
|
||||||
|
- name: kube-multus
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||||
|
# 2026-05-11: upstream default of 50Mi memory limit OOM-cascades when
|
||||||
|
# an operator-owned namespace accumulates >100 pending pods retrying
|
||||||
|
# CNI ADD. RemoteDesktop emitted 219 orphan rd-browser-only pods
|
||||||
|
# (missing OwnerReferences), kubelet's CNI ADD avalanche pushed multus
|
||||||
|
# over 50Mi, OOMKilled, restarted with even bigger backlog → loop.
|
||||||
|
# 21h cluster outage. See FlowerCore.Notes:
|
||||||
|
# feedback_multus_50mi_limit_oom_orphan_pod_avalanche.md
|
||||||
|
# 1Gi limit / 512Mi request comfortably handles a 200+ pod CNI
|
||||||
|
# catchup burst on 64GB nodes (nodes are <25% used in steady-state).
|
||||||
|
# Drop back toward 256Mi only after MultusMemoryPressure alert
|
||||||
|
# proves steady-state working set sits well below 200Mi.
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "512Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "1Gi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cni
|
||||||
|
mountPath: /host/etc/cni/net.d
|
||||||
|
# multus-daemon expects that cnibin path must be identical between pod and container host.
|
||||||
|
# e.g. if the cni bin is in '/opt/cni/bin' on the container host side, then it should be mount to '/opt/cni/bin' in multus-daemon,
|
||||||
|
# not to any other directory, like '/opt/bin' or '/usr/bin'.
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /opt/cni/bin
|
||||||
|
- name: host-run
|
||||||
|
mountPath: /host/run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
mountPath: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
mountPath: /var/lib/kubelet
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
mountPath: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
mountPath: /run/netns
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: multus-daemon-config
|
||||||
|
mountPath: /etc/cni/net.d/multus.d
|
||||||
|
readOnly: true
|
||||||
|
- name: hostroot
|
||||||
|
mountPath: /hostroot
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- mountPath: /etc/cni/multus/net.d
|
||||||
|
name: multus-conf-dir
|
||||||
|
env:
|
||||||
|
- name: MULTUS_NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
initContainers:
|
||||||
|
- name: install-multus-binary
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command:
|
||||||
|
- "sh"
|
||||||
|
- "-c"
|
||||||
|
- "cp /usr/src/multus-cni/bin/multus-shim /host/opt/cni/bin/multus-shim && cp /usr/src/multus-cni/bin/passthru /host/opt/cni/bin/passthru"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "10m"
|
||||||
|
memory: "15Mi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /host/opt/cni/bin
|
||||||
|
mountPropagation: Bidirectional
|
||||||
|
terminationGracePeriodSeconds: 10
|
||||||
|
volumes:
|
||||||
|
- name: cni
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/net.d
|
||||||
|
- name: cnibin
|
||||||
|
hostPath:
|
||||||
|
path: /opt/cni/bin
|
||||||
|
- name: hostroot
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: multus-daemon-config
|
||||||
|
configMap:
|
||||||
|
name: multus-daemon-config
|
||||||
|
items:
|
||||||
|
- key: daemon-config.json
|
||||||
|
path: daemon-config.json
|
||||||
|
- name: host-run
|
||||||
|
hostPath:
|
||||||
|
path: /run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/kubelet
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
hostPath:
|
||||||
|
path: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
hostPath:
|
||||||
|
path: /run/netns/
|
||||||
|
- name: multus-conf-dir
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/multus/net.d
|
||||||
226
apps/selenium/network-policy.yaml
Normal file
226
apps/selenium/network-policy.yaml
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
# Selenium Grid NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Captured into bluejay-infra 2026-05-07 during the regroup audit. This
|
||||||
|
# NetworkPolicy was previously applied via `kubectl apply` directly to
|
||||||
|
# the cluster with no source-of-truth anywhere — a fresh cluster rebuild
|
||||||
|
# would have lost all of it (including the Selenium Grid → Traefik VIP
|
||||||
|
# allow rule for AAT runs against `*.iamworkin.lan` services).
|
||||||
|
#
|
||||||
|
# The Selenium Grid Deployment + Services themselves are still managed
|
||||||
|
# outside ArgoCD (deployed via raw kubectl from the original Selenium
|
||||||
|
# Grid bring-up). Migrating those into bluejay-infra is a separate lane —
|
||||||
|
# this commit only restores GitOps repeatability for the NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Rules captured from the live cluster's `kubectl get netpol -n selenium
|
||||||
|
# selenium-netpol -o yaml` on 2026-05-07. Originally applied 2026-03-15
|
||||||
|
# (from `metadata.creationTimestamp` before the field was stripped).
|
||||||
|
#
|
||||||
|
# Allows:
|
||||||
|
# - Egress: CoreDNS, intra-namespace pod-to-pod (4442/4443/4444/5555),
|
||||||
|
# Traefik VIP for `*.iamworkin.lan` AAT runs, all FC namespaces on
|
||||||
|
# standard FC service ports (5100/5200/5300/5400/8080), pod CIDR
|
||||||
|
# (10.42.0.0/16) + service CIDR (10.43.0.0/16) for the same ports,
|
||||||
|
# LAN gateway range (10.0.56.0/24) for HTTPS, edge2 CUPS print
|
||||||
|
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
|
||||||
|
# fc-signage:5190 for the signage AAT lane.
|
||||||
|
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
|
||||||
|
# telephony / gitea / fc-system / fc-signage / github-runner namespaces
|
||||||
|
# on 4444.
|
||||||
|
#
|
||||||
|
# 2026-05-25: added github-runner ingress on 4444 so CI jobs running in
|
||||||
|
# self-hosted runner pods (e.g. FlowerCore.Print.Web `help-screenshots`)
|
||||||
|
# can reach the grid. Without this allow, the session POST to
|
||||||
|
# `selenium-hub.selenium.svc.cluster.local:4444` was DNAT'd to the hub
|
||||||
|
# pod IP and then dropped at the Calico ingress hook — Selenium UI showed
|
||||||
|
# 0/4 sessions while the .NET HTTP client timed out at 60s. Same family
|
||||||
|
# as `feedback_netpol_dnat_backend_port`, wrong-source-namespace flavor.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: selenium-netpol
|
||||||
|
namespace: selenium
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: selenium
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
egress:
|
||||||
|
- ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
- ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- podSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ports:
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.16/32
|
||||||
|
- ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 192.168.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 5190
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- podSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: telephony
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: gitea
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: github-runner
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
podSelector: {}
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
|
||||||
427
apps/selenium/selenium-grid.yaml
Normal file
427
apps/selenium/selenium-grid.yaml
Normal file
@@ -0,0 +1,427 @@
|
|||||||
|
# Selenium Grid 4 — RKE2 deployment
|
||||||
|
#
|
||||||
|
# Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from
|
||||||
|
# the GitHub Actions self-hosted runners. ArgoCD owns this namespace from
|
||||||
|
# 2026-05-25 (`infra-selenium` Application; previously these resources were
|
||||||
|
# orphan kubectl-applied since 2026-03-15).
|
||||||
|
#
|
||||||
|
# Endpoints:
|
||||||
|
# - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444
|
||||||
|
# - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444
|
||||||
|
# - Traefik public: https://selenium.iamworkin.lan
|
||||||
|
#
|
||||||
|
# Browser maxSessions:
|
||||||
|
# - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy
|
||||||
|
# Print.Web help-screenshots was the global bottleneck;
|
||||||
|
# see commit history for ops/runner-replica-rightsize)
|
||||||
|
# - firefox 1
|
||||||
|
# - edge 1
|
||||||
|
#
|
||||||
|
# Screenshots + video recording write to NFS via the chrome video sidecar.
|
||||||
|
# See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
port: 4444
|
||||||
|
targetPort: 4444
|
||||||
|
- name: publish
|
||||||
|
port: 4442
|
||||||
|
targetPort: 4442
|
||||||
|
- name: subscribe
|
||||||
|
port: 4443
|
||||||
|
targetPort: 4443
|
||||||
|
selector:
|
||||||
|
app: selenium-hub
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
metallb.io/ip-allocated-from-pool: bluejay-pool
|
||||||
|
metallb.universe.tf/loadBalancerIPs: 10.0.56.208
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
component: external-access
|
||||||
|
name: selenium-hub-external
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
clusterIP: 10.43.90.147
|
||||||
|
clusterIPs:
|
||||||
|
- 10.43.90.147
|
||||||
|
externalTrafficPolicy: Local
|
||||||
|
healthCheckNodePort: 32213
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
nodePort: 32411
|
||||||
|
port: 4444
|
||||||
|
targetPort: 4444
|
||||||
|
- name: publish
|
||||||
|
nodePort: 32068
|
||||||
|
port: 4442
|
||||||
|
targetPort: 4442
|
||||||
|
- name: subscribe
|
||||||
|
nodePort: 31000
|
||||||
|
port: 4443
|
||||||
|
targetPort: 4443
|
||||||
|
selector:
|
||||||
|
app: selenium-hub
|
||||||
|
type: LoadBalancer
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-hub
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
- name: SE_SESSION_REQUEST_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
- name: SE_SESSION_RETRY_INTERVAL
|
||||||
|
value: '5'
|
||||||
|
- name: JAVA_OPTS
|
||||||
|
value: -Xmx512m
|
||||||
|
image: selenium/hub:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /wd/hub/status
|
||||||
|
port: 4444
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: selenium-hub
|
||||||
|
ports:
|
||||||
|
- containerPort: 4444
|
||||||
|
name: web
|
||||||
|
- containerPort: 4442
|
||||||
|
name: publish
|
||||||
|
- containerPort: 4443
|
||||||
|
name: subscribe
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /wd/hub/status
|
||||||
|
port: 4444
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 5
|
||||||
|
# Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi
|
||||||
|
# limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the
|
||||||
|
# stampede-buffer pattern documented for multus
|
||||||
|
# (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m
|
||||||
|
# against a 500m limit, no contention.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1536Mi
|
||||||
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
app.kubernetes.io/name: selenium-node-chrome
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-chrome
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
app.kubernetes.io/name: selenium-node-chrome
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '2'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'false'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-chrome:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
name: selenium-chrome
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||||
|
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||||
|
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||||
|
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||||
|
# tested-stable 2Gi limit. CPU unchanged.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- env:
|
||||||
|
- name: DISPLAY_CONTAINER_NAME
|
||||||
|
value: localhost
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_VIDEO_FILE_NAME
|
||||||
|
value: auto
|
||||||
|
- name: SE_VIDEO_UPLOAD_ENABLED
|
||||||
|
value: 'false'
|
||||||
|
image: selenium/video:ffmpeg-7.1-20250101
|
||||||
|
name: video
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 768Mi
|
||||||
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 384Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /videos
|
||||||
|
name: selenium-videos
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
- emptyDir:
|
||||||
|
sizeLimit: 5Gi
|
||||||
|
name: selenium-videos
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
app.kubernetes.io/name: selenium-node-firefox
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-firefox
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
app.kubernetes.io/name: selenium-node-firefox
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '1'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'true'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_START_VNC
|
||||||
|
value: 'false'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-firefox:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 5
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: selenium-firefox
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 5
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
app.kubernetes.io/name: selenium-node-edge
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-edge
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
app.kubernetes.io/name: selenium-node-edge
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '1'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'true'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-edge:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
name: selenium-edge
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||||
|
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||||
|
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||||
|
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||||
|
# tested-stable 2Gi limit. CPU unchanged.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`selenium.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: selenium-hub
|
||||||
|
port: 4444
|
||||||
|
tls:
|
||||||
|
secretName: selenium-tls
|
||||||
@@ -127,10 +127,13 @@ spec:
|
|||||||
initContainers:
|
initContainers:
|
||||||
- name: fix-data-perms
|
- name: fix-data-perms
|
||||||
image: busybox:latest
|
image: busybox:latest
|
||||||
# Also chown /shared-tts (hostPath /tmp/tts-audio) so the non-root
|
# Must run as root to chown the hostPath /tmp/tts-audio that may be
|
||||||
# app user (uid 1654) can write Piper .sln16 files that Asterisk
|
# root-owned after node reboot. Pod-level runAsNonRoot:true would
|
||||||
# reads at /var/lib/asterisk/sounds/tts. World-readable (755) is
|
# otherwise inherit and chown would fail with EPERM (see Notes memory
|
||||||
# fine — Asterisk runs as a different uid in the other pod.
|
# feedback_hostpath_initcontainer_chown_perms).
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsNonRoot: false
|
||||||
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: telephony-data
|
- name: telephony-data
|
||||||
|
|||||||
68
apps/worldbuilder/README.md
Normal file
68
apps/worldbuilder/README.md
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# FlowerCore.WorldBuilder
|
||||||
|
|
||||||
|
ArgoCD-managed manifest for FlowerCore.WorldBuilder.Web — comic / storyboard
|
||||||
|
authoring service that drives ComfyUI for panel image generation and
|
||||||
|
QuestPDF for letter / A4 export.
|
||||||
|
|
||||||
|
Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||||
|
|
||||||
|
## Deployment order
|
||||||
|
|
||||||
|
1. **DNS preflight** — `worldbuilder.iamworkin.lan -> 10.0.56.200` MUST exist
|
||||||
|
in pfSense Unbound before this manifest is applied, or cert-manager
|
||||||
|
HTTP-01 silently exponential-backs-off ~2h.
|
||||||
|
Memory: `feedback_pfsense_dns_required_for_acme`.
|
||||||
|
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
|
||||||
|
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
|
||||||
|
`rke2-agent2` (10.0.56.13). Build with:
|
||||||
|
```bash
|
||||||
|
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
ssh fcadmin@$h \
|
||||||
|
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||||
|
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
done
|
||||||
|
```
|
||||||
|
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||||
|
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||||
|
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||||
|
4. **First production render** — open
|
||||||
|
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
|
||||||
|
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
|
||||||
|
generated images. This Sprint 32 visitor-safe profile uses
|
||||||
|
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
|
||||||
|
for an operator-owned GPU render lane.
|
||||||
|
|
||||||
|
## Health probes
|
||||||
|
|
||||||
|
- `startupProbe` + `readinessProbe`: `httpGet /healthz` (registered explicitly
|
||||||
|
in Program.cs — anonymous, no DB or OpenAPI dependency).
|
||||||
|
- `livenessProbe`: `tcpSocket` as a cheap fallback.
|
||||||
|
Memory: `feedback_k8s_probes_must_not_hit_openapi`,
|
||||||
|
`feedback_k8s_probes_behind_auth_middleware`.
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
|
||||||
|
- Longhorn RWO PVC `worldbuilder-data` (5Gi) mounted at `/data`. SQLite DB
|
||||||
|
lives at `/data/worldbuilder.db`, generated images under `/data/gallery/`,
|
||||||
|
PDF/PNG exports under `/data/exports/`.
|
||||||
|
- DataProtection keys persist to the same SQLite via
|
||||||
|
`AddFlowerCoreDataProtection<WorldBuilderDbContext>` — explicit migration
|
||||||
|
`20260429133417_Initial` already creates `fc_dp_keys`.
|
||||||
|
Memory: `feedback_dataprotection_keys_persist_to_app_dbcontext`,
|
||||||
|
`feedback_intranet_dataprotection_table_must_have_explicit_migration`.
|
||||||
|
|
||||||
|
## Image generation backend
|
||||||
|
|
||||||
|
Sprint 32 pins the Kubernetes profile to
|
||||||
|
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
|
||||||
|
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
|
||||||
|
deterministic, avoids GPU exposure, and still exercises the studio/gallery
|
||||||
|
surface with persisted generated-image metadata.
|
||||||
|
|
||||||
|
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
|
||||||
|
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
|
||||||
|
operator-owned follow-up that also verifies workstation reachability and image
|
||||||
|
import freshness.
|
||||||
256
apps/worldbuilder/worldbuilder.yaml
Normal file
256
apps/worldbuilder/worldbuilder.yaml
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
# FlowerCore.WorldBuilder — comic / storyboard authoring service.
|
||||||
|
#
|
||||||
|
# Deployment + Service + PVC + Certificate + IngressRoute. ArgoCD-managed
|
||||||
|
# end-to-end. See apps/worldbuilder/README.md for the per-deploy runbook.
|
||||||
|
#
|
||||||
|
# Image build (BLUEJAY-WS):
|
||||||
|
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
# done
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-worldbuilder
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
---
|
||||||
|
# SQLite DB + generated image gallery + PDF/PNG exports.
|
||||||
|
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-data
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-data
|
||||||
|
app.kubernetes.io/component: storage
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# RWO PVC + single replica. Recreate avoids multi-attach overlap.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics/prometheus"
|
||||||
|
flowercore.io/audit-trace-id: "worldbuilder-runtime-demo"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
# Bump tag for each rebuild. Initial deploy: v202605062048
|
||||||
|
image: localhost/fc-worldbuilder:v202605062048
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_RUNNING_IN_CONTAINER
|
||||||
|
value: "true"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
# SQLite path overrides (default appsettings uses relative paths).
|
||||||
|
- name: ConnectionStrings__DefaultConnection
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "Sqlite"
|
||||||
|
- name: FlowerCore__Database__ConnectionStrings__Sqlite
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
# Generated image gallery + exports persist on /data.
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageStore__RootPath
|
||||||
|
value: "/data/gallery"
|
||||||
|
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||||
|
value: "/data/exports"
|
||||||
|
# Visitor-safe Sprint 32 profile: fake backend keeps public demo
|
||||||
|
# rendering deterministic and avoids exposing BLUEJAY-WS GPU.
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||||
|
value: "http://127.0.0.1:1"
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||||
|
value: "fake"
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
|
||||||
|
value: "fake"
|
||||||
|
resources:
|
||||||
|
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
|
||||||
|
# time) while actual CPU usage is well below capacity. Idle Blazor
|
||||||
|
# Server + SignalR + a single ComfyUI poller uses ~5m, so 25m is
|
||||||
|
# generous. Re-evaluate if active rendering/export workers ever
|
||||||
|
# push past the limit.
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
# /healthz is registered explicitly in Program.cs (anonymous, no DB
|
||||||
|
# or OpenAPI dependency). Liveness uses tcpSocket as a cheap fallback
|
||||||
|
# in case future middleware changes accidentally gate /healthz.
|
||||||
|
# Memory: feedback_k8s_probes_must_not_hit_openapi,
|
||||||
|
# feedback_k8s_probes_behind_auth_middleware.
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: worldbuilder-data
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web-tls
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web-tls
|
||||||
|
app.kubernetes.io/component: ingress
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- worldbuilder.iamworkin.lan
|
||||||
|
# step-ca ACME provisioner caps lifetime at 30d. Requesting 90d
|
||||||
|
# silently capped to 30d, making renewBefore 720h (30d) equal to the
|
||||||
|
# actual cert lifetime — triggered a perpetual renewal loop that
|
||||||
|
# generated 2365+ CertificateRequest objects in 18h. Match the working
|
||||||
|
# 720h/240h pattern used by every other FC service cert.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: ingress
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`worldbuilder.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: worldbuilder-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
@@ -305,15 +305,17 @@ spec:
|
|||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 60
|
initialDelaySeconds: 60
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 15
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 15
|
||||||
|
failureThreshold: 3
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
|||||||
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# openvoxserver Quadlet Durability
|
||||||
|
|
||||||
|
This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`.
|
||||||
|
|
||||||
|
## Current State
|
||||||
|
|
||||||
|
As of the Sprint 32 Cx-12 apply on 2026-05-17:
|
||||||
|
|
||||||
|
- `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key.
|
||||||
|
- `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container.
|
||||||
|
- `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed.
|
||||||
|
- `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`.
|
||||||
|
- `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present.
|
||||||
|
|
||||||
|
## Discovery
|
||||||
|
|
||||||
|
Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service"
|
||||||
|
```
|
||||||
|
|
||||||
|
If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit.
|
||||||
|
|
||||||
|
## Durable Fix Shape
|
||||||
|
|
||||||
|
The Quadlet keeps the deploy key as a path reference only:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts
|
||||||
|
```
|
||||||
|
|
||||||
|
The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/usr/bin/podman exec openvoxserver git config --global --add safe.directory *
|
||||||
|
```
|
||||||
|
|
||||||
|
The deploy script self-heals inside the container before it fetches the control repo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git config --global --add safe.directory "*" 2>/dev/null || true
|
||||||
|
DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key"
|
||||||
|
KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||||
|
REPO="/etc/puppetlabs/code/environments/production"
|
||||||
|
export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS"
|
||||||
|
git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Validation
|
||||||
|
|
||||||
|
Non-destructive validation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand"
|
||||||
|
```
|
||||||
|
|
||||||
|
Destructive recreate smoke is opt-in only:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds.
|
||||||
|
|
||||||
|
## Credential Rotation Note
|
||||||
|
|
||||||
|
When rotating the Puppet deploy key, update the persisted serverdata copy on noc1:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo install -m 0600 -o root -g root <new-deploy-key> /opt/puppet/serverdata/.puppet-deploy-key
|
||||||
|
sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||||
|
sudo systemctl start openvoxserver-safeconfig.service
|
||||||
|
sudo systemctl start puppet-deploy.service
|
||||||
|
```
|
||||||
|
|
||||||
|
Never commit the deploy key or print it in logs.
|
||||||
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then
|
||||||
|
echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2
|
||||||
|
exit 64
|
||||||
|
fi
|
||||||
|
|
||||||
|
SUDO="${SUDO:-sudo}"
|
||||||
|
REPO="/etc/puppetlabs/code/environments/production"
|
||||||
|
CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key"
|
||||||
|
|
||||||
|
if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then
|
||||||
|
echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2
|
||||||
|
exit 65
|
||||||
|
fi
|
||||||
|
|
||||||
|
before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)"
|
||||||
|
echo "Before recreate: $before"
|
||||||
|
|
||||||
|
$SUDO systemctl stop openvoxserver
|
||||||
|
$SUDO podman rm openvoxserver 2>/dev/null || true
|
||||||
|
$SUDO systemctl start openvoxserver
|
||||||
|
|
||||||
|
sleep 50
|
||||||
|
|
||||||
|
$SUDO systemctl start puppet-deploy.service
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
$SUDO systemctl status puppet-deploy.service --no-pager -l
|
||||||
|
|
||||||
|
after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)"
|
||||||
|
echo "After recreate origin/master: $after"
|
||||||
|
|
||||||
|
$SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests
|
||||||
|
|
||||||
|
core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)"
|
||||||
|
case "$core_ssh" in
|
||||||
|
*"$CORE_SSH_COMMAND_FRAGMENT"*) ;;
|
||||||
|
*)
|
||||||
|
echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
$SUDO podman exec openvoxserver git -C "$REPO" status --short --branch
|
||||||
|
|
||||||
|
echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure."
|
||||||
24
tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj
Normal file
24
tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net10.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<IsPackable>false</IsPackable>
|
||||||
|
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="coverlet.collector" Version="6.0.2">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
|
<PackageReference Include="FluentAssertions" Version="6.12.1" />
|
||||||
|
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||||
|
<PackageReference Include="xunit" Version="2.9.2" />
|
||||||
|
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
|
<PackageReference Include="YamlDotNet" Version="16.2.0" />
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
||||||
206
tests/bluejay-infra-lint/DivoomPiDeployArtifactTests.cs
Normal file
206
tests/bluejay-infra-lint/DivoomPiDeployArtifactTests.cs
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
using FluentAssertions;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
namespace BluejayInfraLint.Tests;
|
||||||
|
|
||||||
|
[Trait("Category", "Unit")]
|
||||||
|
public sealed class DivoomPiDeployArtifactTests
|
||||||
|
{
|
||||||
|
private static readonly string Root = FindRepoRoot();
|
||||||
|
private static readonly string DmRoot = Path.Combine(Root, "apps", "fc-divoom-dm-pi-device");
|
||||||
|
private static readonly string TvRoot = Path.Combine(Root, "apps", "fc-divoom-tv-pi");
|
||||||
|
|
||||||
|
public static TheoryData<string> DmRequiredArtifacts => new()
|
||||||
|
{
|
||||||
|
"README.md",
|
||||||
|
"hiera/edge2-divoom-dm-device.overlay.yaml",
|
||||||
|
"puppet/profile/pi/service/divoom_dm_device.pp",
|
||||||
|
"puppet/templates/divoom-device-registration.json.epp",
|
||||||
|
"puppet/templates/flowercore-divoom-dm-agent.service.epp",
|
||||||
|
};
|
||||||
|
|
||||||
|
public static TheoryData<string> TvRequiredArtifacts => new()
|
||||||
|
{
|
||||||
|
"README.md",
|
||||||
|
"hiera/example-divoom-tv-pi.iamworkin.lan.yaml",
|
||||||
|
"puppet/profile/pi/service/divoom_tv.pp",
|
||||||
|
"systemd/flowercore-divoom-tv.service",
|
||||||
|
"systemd/flowercore-divoom-tv-hdmi.service",
|
||||||
|
"systemd/99-flowercore-divoom-tv-hdmi.rules",
|
||||||
|
"scripts/flowercore-divoom-tv-prelaunch.sh",
|
||||||
|
"scripts/flowercore-divoom-tv-launch.sh",
|
||||||
|
"scripts/flowercore-divoom-tv-hdmi-respond.sh",
|
||||||
|
};
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[MemberData(nameof(DmRequiredArtifacts))]
|
||||||
|
public void DmDeviceArtifacts_ArePresent(string relativePath)
|
||||||
|
{
|
||||||
|
File.Exists(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Theory]
|
||||||
|
[MemberData(nameof(TvRequiredArtifacts))]
|
||||||
|
public void TvPiArtifacts_ArePresent(string relativePath)
|
||||||
|
{
|
||||||
|
File.Exists(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmDeviceReadme_DeclaresPuppetSystemdNotKubernetes()
|
||||||
|
{
|
||||||
|
var readme = ReadDm("README.md");
|
||||||
|
|
||||||
|
readme.Should().Contain("not a Kubernetes application");
|
||||||
|
readme.Should().Contain("profile::pi::service::divoom");
|
||||||
|
readme.Should().Contain("no K8s surface");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmHieraOverlay_PreservesExistingEdge2DivoomService()
|
||||||
|
{
|
||||||
|
var hiera = ReadDm("hiera/edge2-divoom-dm-device.overlay.yaml");
|
||||||
|
|
||||||
|
hiera.Should().Contain("fc-pimanager:");
|
||||||
|
hiera.Should().Contain("fc-divoom:");
|
||||||
|
hiera.Should().Contain("enabled: true");
|
||||||
|
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_enabled: false");
|
||||||
|
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_ensure: 'stopped'");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmPuppetProfile_DefaultsToStoppedDisabledService()
|
||||||
|
{
|
||||||
|
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
|
||||||
|
|
||||||
|
profile.Should().Contain("Boolean $service_enabled = false");
|
||||||
|
profile.Should().Contain("Enum['running', 'stopped'] $service_ensure = 'stopped'");
|
||||||
|
profile.Should().Contain("service { $service_name:");
|
||||||
|
profile.Should().Contain("ensure => $service_ensure");
|
||||||
|
profile.Should().Contain("enable => $service_enabled");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmPuppetProfile_DoesNotManageLiveDivoomWebUnit()
|
||||||
|
{
|
||||||
|
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
|
||||||
|
|
||||||
|
profile.Should().NotContain("Service['flowercore-divoom.service']");
|
||||||
|
profile.Should().NotContain("service { 'flowercore-divoom.service'");
|
||||||
|
profile.Should().NotContain("notify => Service");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmAgentUnit_IsSeparateAndGatedByExistingWrappers()
|
||||||
|
{
|
||||||
|
var unit = ReadDm("puppet/templates/flowercore-divoom-dm-agent.service.epp");
|
||||||
|
|
||||||
|
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh");
|
||||||
|
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh");
|
||||||
|
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh");
|
||||||
|
unit.Should().Contain("ExecStart=<%= $agent_binary_path %> --mode=Pi");
|
||||||
|
unit.Should().NotContain("flowercore-divoom.service");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DmRegistration_CarriesRenderProofAndSafetyPolicy()
|
||||||
|
{
|
||||||
|
var registration = ReadDm("puppet/templates/divoom-device-registration.json.epp");
|
||||||
|
|
||||||
|
registration.Should().Contain("\"candidateChannels\": <%= $bt_channels_json %>");
|
||||||
|
registration.Should().Contain("\"deviceInfoIsRenderProof\": false");
|
||||||
|
registration.Should().Contain("\"visibleRenderProofRequired\": <%= $visible_render_proof_required %>");
|
||||||
|
registration.Should().Contain("\"preserveExistingService\": \"flowercore-divoom.service\"");
|
||||||
|
registration.Should().Contain("\"doNotEnableFmRadio\": true");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TvService_UsesAvaloniaHdmiSafetyGates()
|
||||||
|
{
|
||||||
|
var unit = ReadTv("systemd/flowercore-divoom-tv.service");
|
||||||
|
|
||||||
|
unit.Should().Contain("ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv");
|
||||||
|
unit.Should().Contain("Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv");
|
||||||
|
unit.Should().Contain("RuntimeDirectoryMode=0700");
|
||||||
|
unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh");
|
||||||
|
unit.Should().Contain("ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh");
|
||||||
|
unit.Should().Contain("MemoryMax=2G");
|
||||||
|
unit.Should().Contain("PrivateTmp=true");
|
||||||
|
unit.Should().NotContain("/tmp");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TvLauncher_PrefersCageAndFallsBackToDirectLaunch()
|
||||||
|
{
|
||||||
|
var script = ReadTv("scripts/flowercore-divoom-tv-launch.sh");
|
||||||
|
|
||||||
|
script.Should().Contain("command -v cage");
|
||||||
|
script.Should().Contain("exec cage --");
|
||||||
|
script.Should().Contain("launching FlowerCore.Divoom.Tv directly");
|
||||||
|
script.Should().Contain("--target=hdmi");
|
||||||
|
script.Should().Contain("--presentation-mode=${PRESENTATION_MODE}");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TvHotplugRule_SettlesAndRestartsRenderer()
|
||||||
|
{
|
||||||
|
var rule = ReadTv("systemd/99-flowercore-divoom-tv-hdmi.rules");
|
||||||
|
var responder = ReadTv("scripts/flowercore-divoom-tv-hdmi-respond.sh");
|
||||||
|
|
||||||
|
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
|
||||||
|
rule.Should().Contain("start flowercore-divoom-tv-hdmi.service");
|
||||||
|
responder.Should().Contain("sleep 2");
|
||||||
|
responder.Should().Contain("systemctl restart flowercore-divoom-tv.service");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void TvPuppetProfile_InstallsCageAndStaticArtifacts()
|
||||||
|
{
|
||||||
|
var profile = ReadTv("puppet/profile/pi/service/divoom_tv.pp");
|
||||||
|
|
||||||
|
profile.Should().Contain("package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']");
|
||||||
|
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service'");
|
||||||
|
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh'");
|
||||||
|
profile.Should().Contain("profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules");
|
||||||
|
profile.Should().Contain("Boolean $service_enabled = false");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void DivoomArtifacts_DoNotAddKubernetesWorkloads()
|
||||||
|
{
|
||||||
|
var allText = Directory.GetFiles(DmRoot, "*", SearchOption.AllDirectories)
|
||||||
|
.Concat(Directory.GetFiles(TvRoot, "*", SearchOption.AllDirectories))
|
||||||
|
.Select(File.ReadAllText);
|
||||||
|
|
||||||
|
foreach (var text in allText)
|
||||||
|
{
|
||||||
|
text.Should().NotContain("kind: Deployment");
|
||||||
|
text.Should().NotContain("kind: IngressRoute");
|
||||||
|
text.Should().NotContain("kind: Certificate");
|
||||||
|
text.Should().NotContain("kind: OnePasswordItem");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string ReadDm(string relativePath)
|
||||||
|
=> File.ReadAllText(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||||
|
|
||||||
|
private static string ReadTv(string relativePath)
|
||||||
|
=> File.ReadAllText(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
||||||
|
|
||||||
|
private static string FindRepoRoot()
|
||||||
|
{
|
||||||
|
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||||
|
while (current is not null)
|
||||||
|
{
|
||||||
|
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||||
|
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||||
|
{
|
||||||
|
return current.FullName;
|
||||||
|
}
|
||||||
|
|
||||||
|
current = current.Parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
||||||
|
}
|
||||||
|
}
|
||||||
1115
tests/bluejay-infra-lint/FleetManifestLintTests.cs
Normal file
1115
tests/bluejay-infra-lint/FleetManifestLintTests.cs
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user