Compare commits
191 Commits
0f9d56ee16
...
codex/s57-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
308235e4fe | ||
|
|
404d884863 | ||
| f4bd90f805 | |||
|
|
67d67ab73d | ||
|
|
f7d41cdc60 | ||
|
|
2c0afc28e4 | ||
|
|
ba5f5dd0fb | ||
|
|
dc699da7b3 | ||
|
|
1e8bf54c6e | ||
|
|
e2e93d482c | ||
| 4319cc2b51 | |||
|
|
2bf339ce51 | ||
|
|
5bdedfc5ae | ||
|
|
0307ae16ae | ||
|
|
6c18f69cf2 | ||
|
|
47e2256556 | ||
|
|
9d77f8ba0e | ||
|
|
2f4be19c85 | ||
|
|
2a62c40990 | ||
|
|
7be98e5efc | ||
|
|
a65b356c9d | ||
|
|
08c17ef1b4 | ||
|
|
06f2f002b7 | ||
|
|
7ac4a8b4b7 | ||
|
|
90f2a86819 | ||
|
|
cbdefb2b23 | ||
|
|
1c36fe3a0a | ||
|
|
2b420ce8a4 | ||
|
|
5cbc1a06b1 | ||
|
|
9e7ee39b3a | ||
|
|
ae030a5f33 | ||
| bc8c35896f | |||
|
|
2cc91b6df0 | ||
| 0d2090fe81 | |||
|
|
bc3548e715 | ||
| 74333cc26b | |||
|
|
7310fb88c2 | ||
| 148bc87b9a | |||
|
|
2a1e842100 | ||
| bc28430d24 | |||
|
|
cc92272217 | ||
| d6f4468a9c | |||
|
|
2f796a2ebd | ||
| 1f1f6823db | |||
|
|
b92f74b63a | ||
|
|
cb7f7dbc4d | ||
|
|
03126d5584 | ||
|
|
495e884c41 | ||
|
|
65aa1e6104 | ||
|
|
7f2a3b76b4 | ||
| ea73f00461 | |||
|
|
25ace30a03 | ||
|
|
ca574c2280 | ||
|
|
09387f90e1 | ||
|
|
e641ceab48 | ||
|
|
c263426ea5 | ||
|
|
bacac067cf | ||
| 914fed08d8 | |||
|
|
200aeab032 | ||
|
|
8182616d4c | ||
|
|
f0862ac03c | ||
|
|
46c392605e | ||
| 89b147bbdd | |||
| d7238a5e3b | |||
| fc444a02a1 | |||
| 83d4883d55 | |||
| f8fe3b2688 | |||
| f2ab892ebc | |||
| fef68a9560 | |||
|
|
6fe77225ae | ||
| 634b9c4169 | |||
| b8c7e59005 | |||
| 65ac8d6f01 | |||
| 35844e0dbd | |||
| b1e307151e | |||
| 12b07219c7 | |||
| 9fd32c4415 | |||
| ad670fb344 | |||
|
|
6f6ca50987 | ||
|
|
c7be58c1f7 | ||
|
|
a1f5a393cd | ||
|
|
710340d8be | ||
|
|
7d2daaa4f8 | ||
|
|
e50e103ba0 | ||
|
|
e8094eb0bd | ||
| 8d87d9172c | |||
|
|
cfd9743afa | ||
|
|
5029e209cd | ||
|
|
f298339152 | ||
|
|
6e7d88db49 | ||
|
|
5ae50bd491 | ||
|
|
653d4472f5 | ||
|
|
eb8693e1ce | ||
|
|
667777a653 | ||
|
|
84c9feb893 | ||
|
|
427dbfcef2 | ||
|
|
b651a4e2d0 | ||
|
|
b998f50f48 | ||
|
|
8fd9ae1cd3 | ||
|
|
fc2aca0e9e | ||
|
|
ba18c52130 | ||
|
|
9f6dc1a9d5 | ||
|
|
0bf47dfa33 | ||
|
|
87a7d7c70a | ||
|
|
1c4145a581 | ||
|
|
c50a403f74 | ||
|
|
fb7bd10528 | ||
|
|
6c21d14a98 | ||
|
|
b3529f8e96 | ||
|
|
00c11b4eaa | ||
|
|
04881f46f0 | ||
|
|
c0038e4859 | ||
|
|
dee48831c6 | ||
|
|
0f1dc5f871 | ||
|
|
11c5f6e6cc | ||
|
|
d637fe9b30 | ||
|
|
5bfe41beca | ||
|
|
df22774674 | ||
|
|
c4065b15a3 | ||
|
|
a4aa612373 | ||
|
|
c2eb37dee9 | ||
|
|
bf6f542569 | ||
|
|
e150b2102f | ||
|
|
33a765b0bc | ||
|
|
5484ed7db6 | ||
|
|
2aa84349ea | ||
|
|
851f8e673b | ||
|
|
f78f8c8192 | ||
|
|
9b255fefc1 | ||
|
|
6a89a76e39 | ||
|
|
2489464d4f | ||
|
|
4b777b16ac | ||
|
|
8c60e3a4d3 | ||
|
|
df02b4c3c3 | ||
|
|
c0dceafffd | ||
|
|
490db8f9e6 | ||
|
|
1926bdaf3b | ||
|
|
ca8d062826 | ||
|
|
1889462fc4 | ||
|
|
523ba61232 | ||
|
|
53f67c8713 | ||
|
|
6b9cf3d12c | ||
|
|
0b52093b36 | ||
|
|
7a9098d3bd | ||
|
|
57d7ba46a7 | ||
|
|
9ec2e2d52e | ||
|
|
b4d62a8a50 | ||
|
|
fbbc07023b | ||
|
|
4b0eef0fb0 | ||
|
|
bb09a3786f | ||
|
|
006dbcf671 | ||
|
|
1be71d6ba7 | ||
|
|
0c8026c912 | ||
|
|
621ae47e00 | ||
|
|
ae6b8c0142 | ||
|
|
da55220218 | ||
|
|
b1ad253dd6 | ||
|
|
ee935f6e07 | ||
|
|
2853ee2024 | ||
|
|
b4a34e16ca | ||
|
|
0d5a1fd530 | ||
|
|
1b633f57b2 | ||
|
|
ee8afd0a08 | ||
|
|
cf35884eae | ||
|
|
9881767b11 | ||
|
|
c9bf23834b | ||
|
|
174002023d | ||
|
|
b71f9e4ec9 | ||
|
|
f1431f7324 | ||
|
|
35bd055cb4 | ||
|
|
f604ab419e | ||
|
|
b2786252b0 | ||
|
|
45ee40920d | ||
|
|
8ad7eb714b | ||
|
|
3cb44c3104 | ||
|
|
2400329acd | ||
|
|
c17af882cc | ||
|
|
76b1938afa | ||
|
|
ced04a6148 | ||
|
|
f2258b92a2 | ||
|
|
979a7c7b25 | ||
|
|
0df8f7b936 | ||
|
|
38558641c1 | ||
|
|
63d905b4df | ||
|
|
d95f4e0caf | ||
|
|
7bc565d17e | ||
|
|
dfe9c3b67e | ||
|
|
37f8db89e4 | ||
|
|
00c7d8df24 | ||
|
|
c6811eadd8 | ||
|
|
4d9d537d83 |
4
.gitattributes
vendored
Normal file
4
.gitattributes
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
/.gitattributes text eol=lf
|
||||||
|
*.yaml text eol=lf
|
||||||
|
*.yml text eol=lf
|
||||||
|
*.sh text eol=lf
|
||||||
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# .NET build outputs (lint test project)
|
||||||
|
**/bin/
|
||||||
|
**/obj/
|
||||||
|
|
||||||
|
# Editor / temp
|
||||||
|
.DS_Store
|
||||||
|
*.swp
|
||||||
26
README.md
26
README.md
@@ -99,10 +99,36 @@ curl -sk -X DELETE https://dns.iamworkin.lan/api/v1/servers/<serverId>/zones/iam
|
|||||||
- **CoreDNS template + ndots:5 collision**: inside pods, `<svc>.<ns>.svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (`<svc>`) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`.
|
- **CoreDNS template + ndots:5 collision**: inside pods, `<svc>.<ns>.svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (`<svc>`) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`.
|
||||||
- **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2.
|
- **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2.
|
||||||
- **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`.
|
- **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`.
|
||||||
|
- **IngressRoute namespace split**: this RKE2 Traefik install does not allow cross-namespace service refs. Keep the `IngressRoute`, backend `Service`, and TLS secret in the same namespace; if one host is shared across namespaces, duplicate the `Certificate` and move the route next to the destination service.
|
||||||
|
- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods.
|
||||||
|
- **Public read-write allowlist hosts**: if a public host accepts a tightly bounded write surface (e.g. bootstrap-JWT POST), pin the allowlist as `(Method(GET) || Method(HEAD) || Method(POST) || Method(OPTIONS))`. PUT/PATCH/DELETE must still 404 at the route. Track A's `updatecenter.iamworkin.lan` / `updates.iamworkin.lan` are the canonical example. The lint test enforces this invariant.
|
||||||
|
- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow.
|
||||||
|
- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs.
|
||||||
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
||||||
|
|
||||||
|
## Local manifest lint
|
||||||
|
|
||||||
|
The repo now carries a local-first lint pass for the recurring K8s gotchas that have burned the fleet:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release
|
||||||
|
```
|
||||||
|
|
||||||
|
That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`.
|
||||||
|
|
||||||
|
## Non-K8s Pi Artifacts
|
||||||
|
|
||||||
|
Some `apps/*` directories are deployment artifact bundles consumed by Puppet
|
||||||
|
instead of Kubernetes workloads. `apps/fc-signage-pi-player/` carries the
|
||||||
|
Chromium signage Pi player, `apps/fc-divoom-dm-pi-device/` carries the additive
|
||||||
|
edge2 Divoom-as-DeviceManagement-device profile/Hiera contract, and
|
||||||
|
`apps/fc-divoom-tv-pi/` carries the Divoom TV Pi HDMI systemd/Puppet shape.
|
||||||
|
These bundles intentionally avoid Deployment, IngressRoute, Certificate, and
|
||||||
|
OnePasswordItem resources.
|
||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
|
||||||
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
||||||
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
||||||
- Public DNS operator host: `https://dns.iamworkin.lan`
|
- Public DNS operator host: `https://dns.iamworkin.lan`
|
||||||
|
|||||||
@@ -92,14 +92,17 @@ subjects:
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
|
# Connects directly to fc-llm-bridge for chat + internal util/embed + browser.
|
||||||
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
# Agent Zero's internal util/embed slots stay on the bridge's OpenAI-compatible
|
||||||
|
# /v1 surface, while browser + corpus-search use the Ollama-compatible /api/*
|
||||||
|
# surface through OLLAMA_HOST.
|
||||||
|
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions.
|
||||||
|
|
||||||
---
|
---
|
||||||
# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
|
# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
|
||||||
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
|
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
|
||||||
# Consumed by the chat_model only; util / embedding / browser stay on local
|
# Consumed by chat, internal util/embed, browser, and corpus-search requests
|
||||||
# Ollama via the 127.0.0.1 sidecar proxy.
|
# that traverse fc-llm-bridge.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
@@ -108,6 +111,34 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys"
|
itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Print.Web API key for Agent Zero's print_web.py Python tool.
|
||||||
|
# Syncs from 1Password item "Print.Web API Keys" (password field = API key).
|
||||||
|
# The print_web.py tool reads PRINT_WEB_API_KEY env var for all HTTP requests
|
||||||
|
# to the thermal print service (GET /api/mcp/tools, POST /api/print/*, etc.).
|
||||||
|
# Note: Print.Web uses the legacy REST MCP shape (/api/mcp/tools/*), not the
|
||||||
|
# streamable-http MCP protocol. The print_web Python tool bridges this gap
|
||||||
|
# and is already present in bluejay-tools ConfigMaps.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: print-web-api-keys
|
||||||
|
namespace: agent-zero
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Knowledge MCP bearer token for the direct Agent Zero -> Knowledge.Web path.
|
||||||
|
# The 1Password item currently stores the raw token in its concealed PASSWORD
|
||||||
|
# field, which the operator syncs to Secret key `password`.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
namespace: agent-zero
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||||
|
|
||||||
---
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
@@ -119,7 +150,7 @@ metadata:
|
|||||||
annotations:
|
annotations:
|
||||||
agent-zero/deployment: "nuc"
|
agent-zero/deployment: "nuc"
|
||||||
agent-zero/profile: "bluejay"
|
agent-zero/profile: "bluejay"
|
||||||
agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
|
agent-zero/ollama: "fc-llm-bridge fronts edge1 Pi 5 + AI HAT+ Ollama for cluster browser/corpus-search traffic; internal chat/util/embed route through the bridge's authenticated OpenAI surface"
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
@@ -134,19 +165,18 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
serviceAccountName: agent-zero
|
serviceAccountName: agent-zero
|
||||||
initContainers:
|
initContainers:
|
||||||
# Wait for edge1 Ollama to be reachable before starting Agent Zero.
|
# Wait for fc-llm-bridge to be reachable before starting Agent Zero.
|
||||||
# (Workstation Ollama is intentionally NOT in the cluster path.)
|
- name: wait-for-llm-bridge
|
||||||
- name: wait-for-ollama
|
|
||||||
image: busybox:1.37
|
image: busybox:1.37
|
||||||
command: ["sh", "-c"]
|
command: ["sh", "-c"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
|
echo "Waiting for fc-llm-bridge..."
|
||||||
until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
|
until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do
|
||||||
echo "edge1 Ollama not ready yet, retrying in 5s..."
|
echo "fc-llm-bridge not ready yet, retrying in 5s..."
|
||||||
sleep 5
|
sleep 5
|
||||||
done
|
done
|
||||||
echo "edge1 Ollama is reachable."
|
echo "fc-llm-bridge is reachable."
|
||||||
# Assemble the Blue Jay profile directory structure from ConfigMaps.
|
# Assemble the Blue Jay profile directory structure from ConfigMaps.
|
||||||
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
|
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
|
||||||
- name: setup-bluejay
|
- name: setup-bluejay
|
||||||
@@ -193,73 +223,6 @@ spec:
|
|||||||
- name: bluejay-theme
|
- name: bluejay-theme
|
||||||
mountPath: /tmp/bluejay-theme
|
mountPath: /tmp/bluejay-theme
|
||||||
containers:
|
containers:
|
||||||
- name: ollama-proxy
|
|
||||||
image: nginx:1.27-alpine
|
|
||||||
command: ["/bin/sh", "-c"]
|
|
||||||
args:
|
|
||||||
- |
|
|
||||||
cat > /etc/nginx/nginx.conf <<'NGINX'
|
|
||||||
worker_processes 1;
|
|
||||||
events { worker_connections 1024; }
|
|
||||||
http {
|
|
||||||
upstream ollama_upstream {
|
|
||||||
# edge1 Pi 5 + AI HAT+ is the SOLE upstream.
|
|
||||||
# Workstation Ollama (BLUEJAY-WS) is private dev hardware and
|
|
||||||
# MUST NOT be added back here without explicit operator decision —
|
|
||||||
# adding it would expose the workstation to cluster traffic.
|
|
||||||
server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
|
|
||||||
keepalive 16;
|
|
||||||
}
|
|
||||||
server {
|
|
||||||
listen 11434;
|
|
||||||
# Local healthcheck — proves nginx itself is alive.
|
|
||||||
# Must NOT depend on upstream so liveness doesn't restart
|
|
||||||
# the container when edge1 is slow/offline.
|
|
||||||
location = /healthz {
|
|
||||||
access_log off;
|
|
||||||
return 200 'ok\n';
|
|
||||||
default_type text/plain;
|
|
||||||
}
|
|
||||||
location / {
|
|
||||||
proxy_http_version 1.1;
|
|
||||||
proxy_set_header Connection "";
|
|
||||||
proxy_set_header Host $host;
|
|
||||||
proxy_connect_timeout 5s;
|
|
||||||
proxy_read_timeout 600s;
|
|
||||||
proxy_send_timeout 600s;
|
|
||||||
proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
|
|
||||||
proxy_pass http://ollama_upstream;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
NGINX
|
|
||||||
exec nginx -g 'daemon off;'
|
|
||||||
ports:
|
|
||||||
- containerPort: 11434
|
|
||||||
# Readiness probe DOES check upstream so K8s only routes traffic
|
|
||||||
# when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
|
|
||||||
# slower TCP handshake under load (was timeoutSeconds=1 default →
|
|
||||||
# 172 historic restarts when the workstation primary path went down,
|
|
||||||
# before the cluster was repointed to edge1-only on 2026-04-27).
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /api/tags
|
|
||||||
port: 11434
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 15
|
|
||||||
timeoutSeconds: 5
|
|
||||||
failureThreshold: 3
|
|
||||||
# Liveness probe hits ONLY local healthz — restarts the container
|
|
||||||
# only when nginx itself is dead. Decoupling liveness from upstream
|
|
||||||
# eliminates restart-loops caused by transient upstream outages.
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /healthz
|
|
||||||
port: 11434
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 30
|
|
||||||
timeoutSeconds: 3
|
|
||||||
failureThreshold: 3
|
|
||||||
- name: agent-zero
|
- name: agent-zero
|
||||||
image: agent0ai/agent-zero:latest
|
image: agent0ai/agent-zero:latest
|
||||||
command: ["/bin/bash", "-c"]
|
command: ["/bin/bash", "-c"]
|
||||||
@@ -280,24 +243,41 @@ spec:
|
|||||||
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
||||||
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
||||||
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
||||||
# config.json). util + embedding go to local 127.0.0.1 nginx
|
# config.json). Utility + embedding stay on the authenticated
|
||||||
# proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
|
# OpenAI-compatible /v1 surface; browser and direct tool traffic
|
||||||
# is private dev hardware, intentionally not in the cluster path).
|
# use the bridge's Ollama-compatible root via OLLAMA_HOST.
|
||||||
mkdir -p /a0/usr/plugins/_model_config
|
mkdir -p /a0/usr/plugins/_model_config
|
||||||
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
||||||
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
|
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
|
||||||
MODELCFG
|
MODELCFG
|
||||||
# Strip heredoc indentation
|
# Strip heredoc indentation
|
||||||
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
||||||
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
|
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
|
||||||
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
|
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
|
||||||
# the secret-backed CHAT_MCP_API_KEY env var before initialize.sh.
|
# the secret-backed env vars before initialize.sh. Keep the local
|
||||||
# Use the in-cluster Chat service URL rather than the public
|
# corpus_search.py tool mounted either way so outage fallback
|
||||||
# Traefik hostname so the pod stays off the private VIP lane that
|
# remains available even when fc_knowledge is not advertised.
|
||||||
# the default egress rule blocks.
|
export KNOWLEDGE_MCP_ENABLED=false
|
||||||
if [ -n "${CHAT_MCP_API_KEY:-}" ]; then
|
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||||
export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}"
|
if curl -sf --connect-timeout 3 "${KNOWLEDGE_MCP_HEALTH_URL}" > /dev/null && \
|
||||||
|
curl -sf --connect-timeout 5 \
|
||||||
|
-H "Authorization: Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}" \
|
||||||
|
-H "Accept: application/json, text/event-stream" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"jsonrpc":"2.0","id":"fc-knowledge-bootstrap","method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"agent-zero-bootstrap","version":"1.0"}}}' \
|
||||||
|
"${KNOWLEDGE_MCP_URL}" > /dev/null; then
|
||||||
|
export KNOWLEDGE_MCP_ENABLED=true
|
||||||
|
echo "fc_knowledge enabled from ${KNOWLEDGE_MCP_URL}."
|
||||||
|
else
|
||||||
|
echo "fc_knowledge unavailable or unauthorized; keeping local corpus_search.py as the fallback path."
|
||||||
fi
|
fi
|
||||||
|
else
|
||||||
|
echo "fc_knowledge token missing; keeping local corpus_search.py as the fallback path."
|
||||||
|
fi
|
||||||
|
|
||||||
|
export A0_SET_mcp_servers="$(
|
||||||
|
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
|
||||||
|
)"
|
||||||
# Run the original entrypoint
|
# Run the original entrypoint
|
||||||
exec /exe/initialize.sh $BRANCH
|
exec /exe/initialize.sh $BRANCH
|
||||||
ports:
|
ports:
|
||||||
@@ -309,8 +289,9 @@ spec:
|
|||||||
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
|
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
|
||||||
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
|
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
|
||||||
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
|
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
|
||||||
# Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
|
# Internal utility + embedding use the authenticated OpenAI surface,
|
||||||
# for zero-latency, zero-cost small-model traffic.
|
# while browser/corpus-search use the bridge's Ollama-compatible
|
||||||
|
# endpoints so Agent Zero no longer needs a local proxy sidecar.
|
||||||
- name: A0_SET_chat_model_provider
|
- name: A0_SET_chat_model_provider
|
||||||
value: "openai"
|
value: "openai"
|
||||||
- name: A0_SET_chat_model_name
|
- name: A0_SET_chat_model_name
|
||||||
@@ -332,35 +313,51 @@ spec:
|
|||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
name: fc-llm-bridge-api-keys
|
name: fc-llm-bridge-api-keys
|
||||||
key: agent-zero-k8s
|
key: agent-zero-k8s
|
||||||
|
- name: FC_LLM_BRIDGE_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: fc-llm-bridge-api-keys
|
||||||
|
key: agent-zero-k8s
|
||||||
- name: A0_SET_chat_model_ctx_length
|
- name: A0_SET_chat_model_ctx_length
|
||||||
value: "8192"
|
value: "8192"
|
||||||
- name: A0_SET_chat_model_kwargs
|
- name: A0_SET_chat_model_kwargs
|
||||||
value: '{"temperature": 0, "num_ctx": 8192}'
|
value: '{"temperature": 0, "num_ctx": 8192}'
|
||||||
# Utility model — fast small helper tier through the same proxy
|
# Utility model — fast small helper tier through the OpenAI surface
|
||||||
- name: A0_SET_util_model_provider
|
- name: A0_SET_util_model_provider
|
||||||
value: "ollama"
|
value: "openai"
|
||||||
- name: A0_SET_util_model_name
|
- name: A0_SET_util_model_name
|
||||||
value: "qwen2.5:1.5b"
|
value: "fc:cheap"
|
||||||
- name: A0_SET_util_model_api_base
|
- name: A0_SET_util_model_api_base
|
||||||
value: "http://127.0.0.1:11434"
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||||
- name: A0_SET_util_model_kwargs
|
- name: A0_SET_util_model_kwargs
|
||||||
value: '{"num_ctx": 2048}'
|
value: '{"num_ctx": 2048}'
|
||||||
# Embedding model — nomic through the same proxy
|
# Embedding model — authenticated bridge alias to nomic-embed-text.
|
||||||
|
# LiteLLM's embedding() path needs an explicit provider prefix here
|
||||||
|
# even though the chat slot can use bare fc:* aliases.
|
||||||
- name: A0_SET_embed_model_provider
|
- name: A0_SET_embed_model_provider
|
||||||
value: "ollama"
|
value: "openai"
|
||||||
- name: A0_SET_embed_model_name
|
- name: A0_SET_embed_model_name
|
||||||
value: "nomic-embed-text"
|
value: "openai/fc:embedding"
|
||||||
- name: A0_SET_embed_model_api_base
|
- name: A0_SET_embed_model_api_base
|
||||||
value: "http://127.0.0.1:11434"
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||||
# Browser model — small Gemma candidate through the same proxy
|
# Browser model — small Gemma candidate through the same proxy
|
||||||
- name: A0_SET_browser_model_provider
|
- name: A0_SET_browser_model_provider
|
||||||
value: "ollama"
|
value: "ollama"
|
||||||
- name: A0_SET_browser_model_name
|
- name: A0_SET_browser_model_name
|
||||||
value: "gemma3:4b"
|
value: "gemma3:4b"
|
||||||
- name: A0_SET_browser_model_api_base
|
- name: A0_SET_browser_model_api_base
|
||||||
value: "http://127.0.0.1:11434"
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||||
|
- name: A0_SET_browser_model_api_key
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: fc-llm-bridge-api-keys
|
||||||
|
key: agent-zero-k8s
|
||||||
- name: A0_SET_browser_model_vision
|
- name: A0_SET_browser_model_vision
|
||||||
value: "true"
|
value: "true"
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||||
|
- name: FLOWERCORE_AGENTZERO_OLLAMA_URL
|
||||||
|
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||||
# Agent profile — Blue Jay personality, tools, and system prompt
|
# Agent profile — Blue Jay personality, tools, and system prompt
|
||||||
- name: A0_SET_agent_profile
|
- name: A0_SET_agent_profile
|
||||||
value: "bluejay"
|
value: "bluejay"
|
||||||
@@ -383,9 +380,38 @@ spec:
|
|||||||
name: chat-mcp-api-key
|
name: chat-mcp-api-key
|
||||||
key: api-key
|
key: api-key
|
||||||
optional: true
|
optional: true
|
||||||
# Print.Web — Thermal printer service on edge2
|
# FlowerCore.Knowledge MCP Phase 1 — direct Agent Zero client path.
|
||||||
|
# Probe /healthz first, then try an authenticated initialize call.
|
||||||
|
# If either fails, Agent Zero boots without fc_knowledge and keeps
|
||||||
|
# the local corpus_search.py tool as the outage-safe path.
|
||||||
|
- name: KNOWLEDGE_MCP_URL
|
||||||
|
value: "http://knowledge-web.knowledge.svc/mcp"
|
||||||
|
- name: KNOWLEDGE_MCP_HEALTH_URL
|
||||||
|
value: "http://knowledge-web.knowledge.svc/healthz"
|
||||||
|
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
|
# Print.Web — Thermal printer service on edge2.
|
||||||
|
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
|
||||||
|
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
|
||||||
|
# PRINT_WEB_API_KEY: from 1Password "Print.Web API Keys" password field,
|
||||||
|
# synced by the print-web-api-keys OnePasswordItem CRD above.
|
||||||
|
# The print_web.py Python tool reads both env vars for all HTTP calls.
|
||||||
- name: PRINT_WEB_URL
|
- name: PRINT_WEB_URL
|
||||||
value: "http://10.0.57.16:5200"
|
value: "http://10.0.57.16:5200"
|
||||||
|
- name: PRINT_WEB_API_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: print-web-api-keys
|
||||||
|
key: password
|
||||||
|
# Intranet search — use in-cluster HTTP (no step-ca TLS needed)
|
||||||
|
# corpus_search.py reads FLOWERCORE_FLEET_VECTOR_DIR but that mount is not
|
||||||
|
# on the cluster yet (BLUEJAY-WS only). The tool gracefully returns a
|
||||||
|
# "no DB found" message with rebuild instructions rather than crashing.
|
||||||
|
- name: FLOWERCORE_INTRANET_URL
|
||||||
|
value: "http://intranet-web.intranet.svc:5300"
|
||||||
# Kubernetes
|
# Kubernetes
|
||||||
- name: KUBERNETES_SERVICE_HOST
|
- name: KUBERNETES_SERVICE_HOST
|
||||||
value: "kubernetes.default.svc"
|
value: "kubernetes.default.svc"
|
||||||
@@ -420,7 +446,7 @@ spec:
|
|||||||
command:
|
command:
|
||||||
- /bin/bash
|
- /bin/bash
|
||||||
- -c
|
- -c
|
||||||
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
|
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null"
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
failureThreshold: 2
|
failureThreshold: 2
|
||||||
resources:
|
resources:
|
||||||
@@ -558,13 +584,6 @@ spec:
|
|||||||
protocol: UDP
|
protocol: UDP
|
||||||
- port: 53
|
- port: 53
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
# Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
|
|
||||||
# is private dev hardware and intentionally not allowlisted)
|
|
||||||
- to:
|
|
||||||
- ipBlock:
|
|
||||||
cidr: 10.0.57.17/32
|
|
||||||
ports:
|
|
||||||
- port: 11434
|
|
||||||
# Print.Web on edge2
|
# Print.Web on edge2
|
||||||
- to:
|
- to:
|
||||||
- ipBlock:
|
- ipBlock:
|
||||||
@@ -598,6 +617,26 @@ spec:
|
|||||||
protocol: TCP
|
protocol: TCP
|
||||||
- port: 8080
|
- port: 8080
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
# FlowerCore.Knowledge MCP (Phase 1) — in-cluster direct route with
|
||||||
|
# anonymous /healthz probe plus authenticated /mcp initialize/tool calls.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: knowledge
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# Intranet search API — use in-cluster svc so traffic stays inside
|
||||||
|
# the cluster and is not blocked by the private-range egress denylist.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
# Allow internet (for kubectl image pull, etc)
|
# Allow internet (for kubectl image pull, etc)
|
||||||
- to:
|
- to:
|
||||||
- ipBlock:
|
- ipBlock:
|
||||||
|
|||||||
@@ -7209,6 +7209,9 @@ data:
|
|||||||
"keep_alive": keep_alive,
|
"keep_alive": keep_alive,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
})
|
})
|
||||||
|
curl_headers = ["-H", "Content-Type: application/json"]
|
||||||
|
if os.environ.get("FC_LLM_BRIDGE_API_KEY"):
|
||||||
|
curl_headers.extend(["-H", f"X-Api-Key: {os.environ['FC_LLM_BRIDGE_API_KEY']}"])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
@@ -7216,7 +7219,7 @@ data:
|
|||||||
"curl", "-s", "--max-time", "120",
|
"curl", "-s", "--max-time", "120",
|
||||||
"-X", "POST",
|
"-X", "POST",
|
||||||
f"{api_base}/api/generate",
|
f"{api_base}/api/generate",
|
||||||
"-H", "Content-Type: application/json",
|
*curl_headers,
|
||||||
"-d", payload,
|
"-d", payload,
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@@ -13150,6 +13153,451 @@ data:
|
|||||||
- PowerShell 5.1 compatibility is assumed (no PowerShell 7+ features).
|
- PowerShell 5.1 compatibility is assumed (no PowerShell 7+ features).
|
||||||
- All commands run with `-NoProfile -NonInteractive` flags for clean execution.
|
- All commands run with `-NoProfile -NonInteractive` flags for clean execution.
|
||||||
"""
|
"""
|
||||||
|
corpus_search.py: |
|
||||||
|
# FlowerCore Fleet Corpus Vector Search Tool
|
||||||
|
#
|
||||||
|
# Queries the AiStation-built SqliteVecVectorStore DB at /a0/usr/vectors/fleet.db
|
||||||
|
# (bind-mounted read-only from /var/lib/flowercore/vector-stores/ on the host).
|
||||||
|
# Embeds the query through Ollama's nomic-embed-text model, computes cosine
|
||||||
|
# similarity against every stored chunk in pure Python (no numpy — not present
|
||||||
|
# in the container), and returns the top-K nearest neighbors with source metadata.
|
||||||
|
#
|
||||||
|
# This is the offline-friendly counterpart to `intranet_search` (which hits the
|
||||||
|
# Intranet's live REST API). Use it for Bible/Greek/Hebrew/Strong's lookups and
|
||||||
|
# anywhere the workstation has a newer DB than the Intranet one. The store is
|
||||||
|
# refreshed by `aistation-indexer build <edition>` — see the FlowerCore.Knowledge
|
||||||
|
# ADR at docs/ai-agents/flowercore-knowledge-service-plan.md.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import os
|
||||||
|
import sqlite3
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from python.helpers.tool import Tool, Response
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_VECTORS_DIR = os.environ.get(
|
||||||
|
"FLOWERCORE_FLEET_VECTOR_DIR",
|
||||||
|
"/a0/usr/vectors",
|
||||||
|
)
|
||||||
|
# When the caller doesn't pick an explicit DB, prefer the biggest fleet tier
|
||||||
|
# present on disk. Workstation → pi-edge → bmo-bot.
|
||||||
|
PREFERRED_DB_ORDER = [
|
||||||
|
os.environ.get("FLOWERCORE_FLEET_VECTOR_DB", ""),
|
||||||
|
"fleet-workstation-full.db",
|
||||||
|
"fleet-pi-edge.db",
|
||||||
|
"fleet-bmo-bot.db",
|
||||||
|
]
|
||||||
|
OLLAMA_BASE_URL = os.environ.get(
|
||||||
|
"FLOWERCORE_AGENTZERO_OLLAMA_URL",
|
||||||
|
"http://host.containers.internal:11434",
|
||||||
|
)
|
||||||
|
BRIDGE_API_KEY = os.environ.get("FC_LLM_BRIDGE_API_KEY", "").strip()
|
||||||
|
EMBEDDING_MODEL = os.environ.get(
|
||||||
|
"FLOWERCORE_FLEET_EMBEDDING_MODEL",
|
||||||
|
"nomic-embed-text",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CorpusSearch(Tool):
|
||||||
|
async def execute(self, **kwargs) -> Response:
|
||||||
|
"""
|
||||||
|
Semantic search over the FlowerCore fleet corpus (Bible texts, lexicons,
|
||||||
|
dictionaries, morphology) pre-indexed by aistation-indexer.
|
||||||
|
|
||||||
|
Args (via self.args):
|
||||||
|
query (str): Search query text. Required unless action=stats.
|
||||||
|
limit (int): Max results. Default 8.
|
||||||
|
index (str): Optional index name filter ("bible-texts", "lexicons",
|
||||||
|
"dictionaries", "morphology"). Default: all indexes.
|
||||||
|
repo (str): Optional repo filter (e.g. "world-english-bible").
|
||||||
|
db (str): Override DB path OR file name inside FLOWERCORE_FLEET_VECTOR_DIR
|
||||||
|
(defaults to /a0/usr/vectors). If omitted, the largest
|
||||||
|
fleet tier present on disk is picked automatically.
|
||||||
|
action (str): Optional. "stats" returns an inventory of all fleet DBs
|
||||||
|
visible to the tool (names, sizes, index counts, chunk
|
||||||
|
counts, last-built timestamps). No embedding call.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response with ranked chunks (score, source, text preview) OR
|
||||||
|
(when action=stats) a markdown inventory of available fleet DBs.
|
||||||
|
"""
|
||||||
|
query = (self.args.get("query") or "").strip()
|
||||||
|
limit = int(self.args.get("limit") or 8)
|
||||||
|
index_filter = (self.args.get("index") or "").strip()
|
||||||
|
repo_filter = (self.args.get("repo") or "").strip()
|
||||||
|
db_override = (self.args.get("db") or "").strip()
|
||||||
|
action = (self.args.get("action") or "").strip().lower()
|
||||||
|
|
||||||
|
if action == "stats":
|
||||||
|
return Response(message=_render_stats(), break_loop=False)
|
||||||
|
|
||||||
|
if not query:
|
||||||
|
return Response(
|
||||||
|
message=(
|
||||||
|
"Error: 'query' is required unless action=stats.\n"
|
||||||
|
"Example: query=\"what does Genesis 1:1 say\" limit=5\n"
|
||||||
|
"Inventory: action=stats"
|
||||||
|
),
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
db = _resolve_db(db_override)
|
||||||
|
if db is None:
|
||||||
|
return Response(
|
||||||
|
message=(
|
||||||
|
f"Error: no fleet vector DB found under {DEFAULT_VECTORS_DIR}.\n"
|
||||||
|
"Host side: run `aistation-indexer build fleet-workstation-full`\n"
|
||||||
|
"(or `fleet-pi-edge`/`fleet-bmo-bot`) to produce\n"
|
||||||
|
"`/var/lib/flowercore/vector-stores/<slug>.db`, then confirm the\n"
|
||||||
|
"Podman unit mounts that directory into `/a0/usr/vectors:ro`."
|
||||||
|
),
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
query_vec = _embed(query)
|
||||||
|
except Exception as e:
|
||||||
|
return Response(
|
||||||
|
message=f"Error: failed to embed query via Ollama at {OLLAMA_BASE_URL}: {e}",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
hits = _search(db, query_vec, index_filter, repo_filter, limit)
|
||||||
|
except Exception as e:
|
||||||
|
return Response(
|
||||||
|
message=f"Error: corpus search failed: {e}",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not hits:
|
||||||
|
return Response(
|
||||||
|
message=(
|
||||||
|
f"No matches for '{query}' in {db.name}.\n"
|
||||||
|
f"Indexes available: " + _list_indexes_summary(db)
|
||||||
|
),
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
lines = [f"**Corpus search: `{query}`** (top {len(hits)} of {limit} requested, DB={db.name})", ""]
|
||||||
|
for rank, h in enumerate(hits, 1):
|
||||||
|
passage = h.get("passage") or ""
|
||||||
|
lang = h.get("language") or ""
|
||||||
|
meta_bits = [x for x in (h["index"], h["repo"], passage, lang) if x]
|
||||||
|
meta = " · ".join(meta_bits)
|
||||||
|
preview = h["text"]
|
||||||
|
if len(preview) > 320:
|
||||||
|
preview = preview[:320].rstrip() + "…"
|
||||||
|
lines.append(f"{rank}. **{h['score']:.3f}** {meta}")
|
||||||
|
lines.append(f" `{h['source']}`")
|
||||||
|
lines.append(f" {preview}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return Response(message="\n".join(lines).rstrip() + "\n", break_loop=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_db(override: str) -> "Path | None":
|
||||||
|
"""Pick a fleet DB by explicit path, explicit filename, or preferred order."""
|
||||||
|
vectors_dir = Path(DEFAULT_VECTORS_DIR)
|
||||||
|
if override:
|
||||||
|
# Absolute or relative path that points at a real file wins outright.
|
||||||
|
p = Path(override)
|
||||||
|
if p.is_absolute() and p.exists():
|
||||||
|
return p
|
||||||
|
# Otherwise treat it as a filename within the vectors dir.
|
||||||
|
candidate = vectors_dir / override
|
||||||
|
if candidate.exists():
|
||||||
|
return candidate
|
||||||
|
return None
|
||||||
|
|
||||||
|
for name in PREFERRED_DB_ORDER:
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
p = Path(name) if Path(name).is_absolute() else vectors_dir / name
|
||||||
|
if p.exists():
|
||||||
|
return p
|
||||||
|
|
||||||
|
# Fallback: any *.db in the dir, largest first.
|
||||||
|
if vectors_dir.is_dir():
|
||||||
|
candidates = sorted(vectors_dir.glob("*.db"), key=lambda p: p.stat().st_size, reverse=True)
|
||||||
|
if candidates:
|
||||||
|
return candidates[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _embed(text: str) -> list:
|
||||||
|
"""Embed a query via Ollama's /api/embeddings. Single-vector response."""
|
||||||
|
body = json.dumps({"model": EMBEDDING_MODEL, "prompt": text}).encode("utf-8")
|
||||||
|
headers = {"Content-Type": "application/json"}
|
||||||
|
if BRIDGE_API_KEY:
|
||||||
|
headers["X-Api-Key"] = BRIDGE_API_KEY
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings",
|
||||||
|
data=body,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||||
|
data = json.loads(resp.read().decode("utf-8"))
|
||||||
|
vec = data.get("embedding")
|
||||||
|
if not isinstance(vec, list) or not vec:
|
||||||
|
raise RuntimeError(f"Ollama returned no embedding: {data}")
|
||||||
|
return [float(x) for x in vec]
|
||||||
|
|
||||||
|
|
||||||
|
def _cosine(a: list, b: list) -> float:
|
||||||
|
"""Cosine similarity in pure Python — no numpy in the A0 container."""
|
||||||
|
# zip() stops at the shorter — AiStation DB guarantees same dim per index.
|
||||||
|
dot = 0.0
|
||||||
|
na = 0.0
|
||||||
|
nb = 0.0
|
||||||
|
for x, y in zip(a, b):
|
||||||
|
dot += x * y
|
||||||
|
na += x * x
|
||||||
|
nb += y * y
|
||||||
|
if na == 0.0 or nb == 0.0:
|
||||||
|
return 0.0
|
||||||
|
return dot / (math.sqrt(na) * math.sqrt(nb))
|
||||||
|
|
||||||
|
|
||||||
|
def _search(db_path: Path, query_vec: list, index_filter: str, repo_filter: str, limit: int) -> list:
|
||||||
|
"""Load entries, compute cosine, return top-K.
|
||||||
|
|
||||||
|
SqliteVecVectorStore schema:
|
||||||
|
VectorIndexes(IndexName, Dimensions, UpdatedAtUtc)
|
||||||
|
VectorEntries(IndexName, ChunkId, TextContent, SourceRepo, SourceFile,
|
||||||
|
Book, Chapter, VerseRange, Language, ContentType, License,
|
||||||
|
EstimatedTokens, EmbeddingJson)
|
||||||
|
|
||||||
|
Embeddings are stored as JSON arrays in EmbeddingJson; similarity is computed
|
||||||
|
in Python. For ~100k chunks × 768 dims this takes a couple seconds on a
|
||||||
|
workstation — acceptable for interactive A0 use.
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||||
|
try:
|
||||||
|
sql = [
|
||||||
|
"SELECT IndexName, ChunkId, TextContent, SourceRepo, SourceFile, ",
|
||||||
|
" Book, Chapter, VerseRange, Language, EmbeddingJson ",
|
||||||
|
"FROM VectorEntries",
|
||||||
|
]
|
||||||
|
where = []
|
||||||
|
params = []
|
||||||
|
if index_filter:
|
||||||
|
where.append("IndexName = ?")
|
||||||
|
params.append(index_filter)
|
||||||
|
if repo_filter:
|
||||||
|
where.append("SourceRepo LIKE ?")
|
||||||
|
params.append(f"%{repo_filter}%")
|
||||||
|
if where:
|
||||||
|
sql.append(" WHERE " + " AND ".join(where))
|
||||||
|
sql.append(";")
|
||||||
|
|
||||||
|
cursor = conn.execute("".join(sql), params)
|
||||||
|
|
||||||
|
# Min-heap by (score, ...) would be faster but for interactive use we
|
||||||
|
# just sort at the end — simpler and readable.
|
||||||
|
scored = []
|
||||||
|
for row in cursor:
|
||||||
|
idx, chunk_id, text, repo, source_file, book, chapter, verses, lang, emb_json = row
|
||||||
|
try:
|
||||||
|
vec = json.loads(emb_json)
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
continue
|
||||||
|
score = _cosine(query_vec, vec)
|
||||||
|
passage = None
|
||||||
|
if book and chapter:
|
||||||
|
passage = f"{book} {chapter}"
|
||||||
|
if verses:
|
||||||
|
passage += f":{verses}"
|
||||||
|
scored.append((score, {
|
||||||
|
"index": idx,
|
||||||
|
"chunk_id": chunk_id,
|
||||||
|
"text": text,
|
||||||
|
"repo": repo or "",
|
||||||
|
"source": source_file or "",
|
||||||
|
"passage": passage or "",
|
||||||
|
"language": lang or "",
|
||||||
|
}))
|
||||||
|
scored.sort(key=lambda t: t[0], reverse=True)
|
||||||
|
return [{"score": s, **meta} for s, meta in scored[:limit]]
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def _render_stats() -> str:
|
||||||
|
"""Markdown inventory of every *.db in FLOWERCORE_FLEET_VECTOR_DIR."""
|
||||||
|
vectors_dir = Path(DEFAULT_VECTORS_DIR)
|
||||||
|
if not vectors_dir.is_dir():
|
||||||
|
return f"No fleet vector dir mounted at {vectors_dir}. Ask the host operator to build an index with scripts/agent-zero/build-fleet-index.sh."
|
||||||
|
|
||||||
|
dbs = sorted(vectors_dir.glob("*.db"))
|
||||||
|
if not dbs:
|
||||||
|
return f"No fleet DBs present under {vectors_dir}. Run `scripts/agent-zero/build-fleet-index.sh fleet-workstation-full` on the host."
|
||||||
|
|
||||||
|
lines = [f"**Fleet vector DB inventory** ({vectors_dir})", ""]
|
||||||
|
for db in dbs:
|
||||||
|
size_mb = db.stat().st_size / (1024 * 1024)
|
||||||
|
lines.append(f"### `{db.name}` ({size_mb:.1f} MB)")
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
|
||||||
|
try:
|
||||||
|
idx_rows = conn.execute(
|
||||||
|
"SELECT IndexName, Dimensions, UpdatedAtUtc FROM VectorIndexes ORDER BY IndexName;"
|
||||||
|
).fetchall()
|
||||||
|
if not idx_rows:
|
||||||
|
lines.append("- (no indexes registered)")
|
||||||
|
else:
|
||||||
|
counts = dict(conn.execute(
|
||||||
|
"SELECT IndexName, COUNT(*) FROM VectorEntries GROUP BY IndexName;"
|
||||||
|
).fetchall())
|
||||||
|
for name, dim, updated in idx_rows:
|
||||||
|
count = counts.get(name, 0)
|
||||||
|
lines.append(f"- **{name}** — {count:,} chunks × {dim}d (built {updated})")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
except Exception as e:
|
||||||
|
lines.append(f"- (inspect failed: {e})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append(f"**Tool defaults:** embedding model `{EMBEDDING_MODEL}`, Ollama at `{OLLAMA_BASE_URL}`. Pick a DB with `db=<filename>`; filter by `index=<name>`/`repo=<substring>`.")
|
||||||
|
return "\n".join(lines).rstrip() + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
def _list_indexes_summary(db_path: Path) -> str:
|
||||||
|
try:
|
||||||
|
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||||
|
try:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT IndexName, Dimensions, "
|
||||||
|
" (SELECT COUNT(*) FROM VectorEntries WHERE VectorEntries.IndexName = VectorIndexes.IndexName) "
|
||||||
|
"FROM VectorIndexes ORDER BY IndexName;"
|
||||||
|
).fetchall()
|
||||||
|
if not rows:
|
||||||
|
return "(no indexes)"
|
||||||
|
return ", ".join(f"{r[0]}({r[2]}×{r[1]}d)" for r in rows)
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
except Exception as e:
|
||||||
|
return f"(couldn't list: {e})"
|
||||||
|
|
||||||
|
intranet_search.py: |
|
||||||
|
# Intranet Vector Search Tool
|
||||||
|
# Queries the Blue Jay Lab Intranet's Shared.Indexing RAG corpus over its
|
||||||
|
# live REST API (https://intranet.iamworkin.lan/search). Returns ranked chunks
|
||||||
|
# with source file paths and scores.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import ssl
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
from python.helpers.tool import Tool, Response
|
||||||
|
|
||||||
|
|
||||||
|
INTRANET_BASE_URL = os.environ.get(
|
||||||
|
"FLOWERCORE_INTRANET_URL",
|
||||||
|
"https://intranet.iamworkin.lan",
|
||||||
|
)
|
||||||
|
STEPCA_ROOT_CRT = "/a0/usr/ca/stepca-root.crt"
|
||||||
|
|
||||||
|
|
||||||
|
def _ssl_ctx() -> ssl.SSLContext:
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
if os.path.exists(STEPCA_ROOT_CRT):
|
||||||
|
ctx.load_verify_locations(cafile=STEPCA_ROOT_CRT)
|
||||||
|
return ctx
|
||||||
|
|
||||||
|
|
||||||
|
class IntranetSearch(Tool):
|
||||||
|
async def execute(self, **kwargs) -> Response:
|
||||||
|
"""
|
||||||
|
Search the Blue Jay Lab intranet corpus (docs, project notes, dashboards).
|
||||||
|
|
||||||
|
Args (via self.args):
|
||||||
|
query (str): Search query. Required.
|
||||||
|
limit (int): Max chunks to return. Default 8.
|
||||||
|
corpus (str): Optional corpus filter (e.g. "notes", "docs").
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response with ranked chunk text, source path, and score.
|
||||||
|
"""
|
||||||
|
query = self.args.get("query", "").strip()
|
||||||
|
limit = int(self.args.get("limit", 8))
|
||||||
|
corpus = self.args.get("corpus", "").strip()
|
||||||
|
|
||||||
|
if not query:
|
||||||
|
return Response(
|
||||||
|
message="Error: 'query' is required.",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
params = {"q": query, "topK": str(limit)}
|
||||||
|
if corpus:
|
||||||
|
params["indexName"] = corpus
|
||||||
|
url = f"{INTRANET_BASE_URL}/api/search?{urllib.parse.urlencode(params)}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
||||||
|
with urllib.request.urlopen(req, timeout=20, context=_ssl_ctx()) as resp:
|
||||||
|
raw = resp.read().decode("utf-8", errors="replace")
|
||||||
|
except Exception as exc:
|
||||||
|
return Response(
|
||||||
|
message=f"Intranet search failed: {exc}\nURL: {url}",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return Response(
|
||||||
|
message=f"Intranet returned non-JSON response:\n{raw[:500]}",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
hits = data if isinstance(data, list) else (
|
||||||
|
data.get("results") or data.get("hits") or data.get("chunks") or []
|
||||||
|
)
|
||||||
|
if not hits:
|
||||||
|
return Response(
|
||||||
|
message=f"No intranet results for query: {query!r}",
|
||||||
|
break_loop=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
lines = [f"# Intranet search: {query} ({len(hits)} hits)\n"]
|
||||||
|
for i, hit in enumerate(hits[:limit], 1):
|
||||||
|
src = (
|
||||||
|
hit.get("sourceFile")
|
||||||
|
or hit.get("source")
|
||||||
|
or hit.get("path")
|
||||||
|
or hit.get("file")
|
||||||
|
or "?"
|
||||||
|
)
|
||||||
|
repo = hit.get("sourceRepo") or ""
|
||||||
|
idx = hit.get("indexName") or ""
|
||||||
|
score = hit.get("score") or hit.get("similarity") or ""
|
||||||
|
text = (
|
||||||
|
hit.get("snippet")
|
||||||
|
or hit.get("text")
|
||||||
|
or hit.get("content")
|
||||||
|
or hit.get("chunk")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
if len(text) > 600:
|
||||||
|
text = text[:600] + "..."
|
||||||
|
header = f"## [{i}] {repo}/{src}" if repo else f"## [{i}] {src}"
|
||||||
|
if idx:
|
||||||
|
header += f" ({idx})"
|
||||||
|
if score:
|
||||||
|
header += f" score={score:.3f}" if isinstance(score, float) else f" score={score}"
|
||||||
|
lines.append(header)
|
||||||
|
lines.append(text)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return Response(message="\n".join(lines), break_loop=False)
|
||||||
|
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
name: bluejay-tools-c
|
name: bluejay-tools-c
|
||||||
|
|||||||
@@ -20,7 +20,19 @@ spec:
|
|||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/hostname: rke2-agent1
|
kubernetes.io/hostname: rke2-agent1
|
||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
dnsPolicy: ClusterFirstWithHostNet
|
# Keep the search list free of iamworkin.lan so CoreDNS's wildcard
|
||||||
|
# template cannot hijack public egress like downloads.asterisk.org.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- telephony.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 0
|
fsGroup: 0
|
||||||
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
||||||
|
|||||||
448
apps/authentik/authentik.yaml
Normal file
448
apps/authentik/authentik.yaml
Normal file
@@ -0,0 +1,448 @@
|
|||||||
|
# Authentik OIDC backend
|
||||||
|
# ArgoCD-managed. BlueJay Lab.
|
||||||
|
#
|
||||||
|
# Stack:
|
||||||
|
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
|
||||||
|
# - Redis 7 Deployment (no persistence — session/cache only)
|
||||||
|
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
|
||||||
|
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
|
||||||
|
# - Certificate via step-ca-acme ClusterIssuer
|
||||||
|
# - Traefik IngressRoute at id.iamworkin.lan
|
||||||
|
#
|
||||||
|
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
|
||||||
|
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
|
||||||
|
#
|
||||||
|
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
|
||||||
|
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
|
||||||
|
# via API once the bootstrap token is available — see Notes substrate).
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: authentik
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
|
|
||||||
|
---
|
||||||
|
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
|
||||||
|
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
|
||||||
|
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: authentik-credentials
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
|
||||||
|
|
||||||
|
---
|
||||||
|
# Shared media volume for server + worker pods.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: authentik-media
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
storageClassName: longhorn
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 2Gi
|
||||||
|
|
||||||
|
---
|
||||||
|
# PostgreSQL 16 StatefulSet — Authentik's primary store.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: authentik-postgres
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-postgres
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
persistentVolumeClaimRetentionPolicy:
|
||||||
|
whenDeleted: Retain
|
||||||
|
whenScaled: Retain
|
||||||
|
podManagementPolicy: OrderedReady
|
||||||
|
serviceName: authentik-postgres
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 10
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-postgres
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-postgres
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: postgres
|
||||||
|
image: postgres:16-alpine
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
name: postgres
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
value: authentik
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: authentik
|
||||||
|
- name: POSTGRES_INITDB_ARGS
|
||||||
|
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
|
||||||
|
- name: PGDATA
|
||||||
|
value: /var/lib/postgresql/data/pgdata
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "authentik"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "authentik"]
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 100m, memory: 256Mi }
|
||||||
|
limits: { cpu: 1000m, memory: 1Gi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: pgdata
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: pgdata
|
||||||
|
spec:
|
||||||
|
storageClassName: longhorn
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-postgres
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
clusterIP: None
|
||||||
|
selector:
|
||||||
|
app: authentik-postgres
|
||||||
|
ports:
|
||||||
|
- name: postgres
|
||||||
|
port: 5432
|
||||||
|
targetPort: 5432
|
||||||
|
|
||||||
|
---
|
||||||
|
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-redis
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-redis
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-redis
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
args:
|
||||||
|
- "--save"
|
||||||
|
- ""
|
||||||
|
- "--appendonly"
|
||||||
|
- "no"
|
||||||
|
- "--requirepass"
|
||||||
|
- "$(REDIS_PASSWORD)"
|
||||||
|
env:
|
||||||
|
- name: REDIS_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
name: redis
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket: { port: 6379 }
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket: { port: 6379 }
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 50m, memory: 64Mi }
|
||||||
|
limits: { cpu: 500m, memory: 256Mi }
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-redis
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: authentik-redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
|
||||||
|
---
|
||||||
|
# Authentik server Deployment — HTTP frontend on :9000.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-server
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-server
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # shares /media RWO PVC with worker
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-server
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-server
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
|
||||||
|
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
|
||||||
|
# non-root container can mkdir /media/public during the tenant_files migration.
|
||||||
|
fsGroup: 1000
|
||||||
|
containers:
|
||||||
|
- name: server
|
||||||
|
image: ghcr.io/goauthentik/server:2024.12.3
|
||||||
|
args: ["server"]
|
||||||
|
ports:
|
||||||
|
- containerPort: 9000
|
||||||
|
name: http
|
||||||
|
- containerPort: 9443
|
||||||
|
name: https
|
||||||
|
env:
|
||||||
|
- name: AUTHENTIK_SECRET_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: AUTHENTIK_SECRET_KEY
|
||||||
|
- name: AUTHENTIK_REDIS__HOST
|
||||||
|
value: authentik-redis
|
||||||
|
- name: AUTHENTIK_REDIS__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||||
|
value: authentik-postgres
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__USER
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_PASSWORD
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_TOKEN
|
||||||
|
- name: AUTHENTIK_BOOTSTRAP_EMAIL
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: BOOTSTRAP_ADMIN_EMAIL
|
||||||
|
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||||
|
value: "true"
|
||||||
|
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||||
|
value: "false"
|
||||||
|
- name: AUTHENTIK_LOG_LEVEL
|
||||||
|
value: info
|
||||||
|
# First-boot Authentik can take 3+ min on the migration phase
|
||||||
|
# (waiting on DB lock while worker also runs migrations). Initial
|
||||||
|
# delays are generous so kubelet doesn't kill the pod mid-migration;
|
||||||
|
# periodSeconds keeps post-startup probing responsive.
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/ready/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 60
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 12
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/live/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 300
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /-/health/live/
|
||||||
|
port: 9000
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 10
|
||||||
|
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 150m, memory: 512Mi }
|
||||||
|
limits: { cpu: 1500m, memory: 1Gi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: media
|
||||||
|
mountPath: /media
|
||||||
|
volumes:
|
||||||
|
- name: media
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: authentik-media
|
||||||
|
|
||||||
|
---
|
||||||
|
# Authentik worker Deployment — runs Celery background tasks.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: authentik-worker
|
||||||
|
namespace: authentik
|
||||||
|
labels:
|
||||||
|
app: authentik-worker
|
||||||
|
argocd.argoproj.io/instance: infra-authentik
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # shares /media RWO PVC with server
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: authentik-worker
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: authentik-worker
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
# Same as server pod — non-root uid 1000 needs PVC group write.
|
||||||
|
fsGroup: 1000
|
||||||
|
containers:
|
||||||
|
- name: worker
|
||||||
|
image: ghcr.io/goauthentik/server:2024.12.3
|
||||||
|
args: ["worker"]
|
||||||
|
env:
|
||||||
|
- name: AUTHENTIK_SECRET_KEY
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: AUTHENTIK_SECRET_KEY
|
||||||
|
- name: AUTHENTIK_REDIS__HOST
|
||||||
|
value: authentik-redis
|
||||||
|
- name: AUTHENTIK_REDIS__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: REDIS_PASSWORD
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__HOST
|
||||||
|
value: authentik-postgres
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__NAME
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__USER
|
||||||
|
value: authentik
|
||||||
|
- name: AUTHENTIK_POSTGRESQL__PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: authentik-credentials
|
||||||
|
key: POSTGRES_PASSWORD
|
||||||
|
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
|
||||||
|
value: "true"
|
||||||
|
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
|
||||||
|
value: "false"
|
||||||
|
- name: AUTHENTIK_LOG_LEVEL
|
||||||
|
value: info
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 100m, memory: 256Mi }
|
||||||
|
limits: { cpu: 1000m, memory: 768Mi }
|
||||||
|
volumeMounts:
|
||||||
|
- name: media
|
||||||
|
mountPath: /media
|
||||||
|
volumes:
|
||||||
|
- name: media
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: authentik-media
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: authentik-server
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: authentik-server
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 9000
|
||||||
|
targetPort: 9000
|
||||||
|
- name: https
|
||||||
|
port: 9443
|
||||||
|
targetPort: 9443
|
||||||
|
|
||||||
|
---
|
||||||
|
# step-ca leaf certificate for id.iamworkin.lan.
|
||||||
|
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
|
||||||
|
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
|
||||||
|
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: authentik-tls
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
secretName: authentik-tls
|
||||||
|
dnsNames:
|
||||||
|
- id.iamworkin.lan
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: authentik
|
||||||
|
namespace: authentik
|
||||||
|
spec:
|
||||||
|
entryPoints: [websecure]
|
||||||
|
routes:
|
||||||
|
- match: Host(`id.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: authentik-server
|
||||||
|
port: 9000
|
||||||
|
tls:
|
||||||
|
secretName: authentik-tls
|
||||||
69
apps/cdi/README.md
Normal file
69
apps/cdi/README.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# CDI — Containerized Data Importer
|
||||||
|
|
||||||
|
KubeVirt's `containerized-data-importer` for populating PVCs from external
|
||||||
|
sources (HTTP, HTTPS, container registry, S3, virtctl upload). Required to
|
||||||
|
import the Windows Server 2025 ISO into the `windows-server-2025-iso` PVC
|
||||||
|
that `apps/kubevirt-vms/ci1.yaml` mounts as a CDROM.
|
||||||
|
|
||||||
|
## Files
|
||||||
|
|
||||||
|
| File | Source | Purpose |
|
||||||
|
| ----------------- | ----------------------------------------------------------------------------------------------------------------- | -------------------------------------------------- |
|
||||||
|
| `cdi-operator.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — verbatim copy | Installs operator + CRDs (5779 lines, large) |
|
||||||
|
| `cdi-cr.yaml` | [`v1.65.0`](https://github.com/kubevirt/containerized-data-importer/releases/tag/v1.65.0) — annotated + commented | Tells operator to deploy CDI components |
|
||||||
|
|
||||||
|
`cdi-operator.yaml` is **vendored verbatim** from the upstream release for
|
||||||
|
air-gap reproducibility (no internet fetch at deploy time, ArgoCD prune
|
||||||
|
contracts hold). To bump versions:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
CDI_VER=v1.66.0 # for example
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-operator.yaml" \
|
||||||
|
-o apps/cdi/cdi-operator.yaml
|
||||||
|
curl -sL "https://github.com/kubevirt/containerized-data-importer/releases/download/${CDI_VER}/cdi-cr.yaml" \
|
||||||
|
-o /tmp/cdi-cr-new.yaml # then re-apply project header diff
|
||||||
|
git diff apps/cdi/ # review
|
||||||
|
git commit + push
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verify after deploy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n cdi get pods # operator + apiserver + deployment + uploadproxy
|
||||||
|
kubectl get cdis cdi -o jsonpath='{.status.phase}' # "Deployed"
|
||||||
|
kubectl get crd | grep cdi.kubevirt.io
|
||||||
|
# Expected CRDs: datavolumes.cdi.kubevirt.io, cdiconfigs.cdi.kubevirt.io,
|
||||||
|
# storageprofiles.cdi.kubevirt.io, dataimportcrons.cdi.kubevirt.io,
|
||||||
|
# datasources.cdi.kubevirt.io, objecttransfers.cdi.kubevirt.io
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use after install
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# Example DataVolume that imports from HTTP
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: DataVolume
|
||||||
|
metadata:
|
||||||
|
name: my-iso
|
||||||
|
spec:
|
||||||
|
source:
|
||||||
|
http:
|
||||||
|
url: "https://server/path/to.iso"
|
||||||
|
pvc:
|
||||||
|
accessModes: [ReadWriteOnce]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Or upload from local disk via virtctl
|
||||||
|
virtctl image-upload pvc my-iso \
|
||||||
|
--image-path ./my.iso \
|
||||||
|
--size 10Gi \
|
||||||
|
--storage-class longhorn \
|
||||||
|
--access-mode ReadWriteOnce \
|
||||||
|
--uploadproxy-url https://cdi-uploadproxy.cdi.svc:443 \
|
||||||
|
--insecure
|
||||||
|
```
|
||||||
36
apps/cdi/cdi-cr.yaml
Normal file
36
apps/cdi/cdi-cr.yaml
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# CDI CR — Tells the CDI operator to install CDI components into the cluster.
|
||||||
|
# =============================================================================
|
||||||
|
# After cdi-operator.yaml is applied, the operator watches for THIS resource
|
||||||
|
# (CDI named "cdi"). When found, it deploys cdi-apiserver, cdi-deployment,
|
||||||
|
# cdi-uploadproxy, cdi-cronjob, and the importer/uploadserver/cloner pods.
|
||||||
|
#
|
||||||
|
# Configuration:
|
||||||
|
# - HonorWaitForFirstConsumer: PVCs created by DataVolumes wait for first
|
||||||
|
# pod to schedule before binding (lets storage class pick best node).
|
||||||
|
# - WebhookPvcRendering: validates PVC creation against CDI policies.
|
||||||
|
# - imagePullPolicy IfNotPresent: re-pull only on tag rotation.
|
||||||
|
# - nodeSelector linux: pin to Linux nodes (no Windows worker support).
|
||||||
|
#
|
||||||
|
# Andrew may want to add a `uploadProxyURLOverride` later to expose the
|
||||||
|
# uploadproxy via Traefik IngressRoute for `virtctl image-upload` from
|
||||||
|
# BLUEJAY-WS without `kubectl port-forward`. Phase 2 enhancement.
|
||||||
|
# =============================================================================
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: CDI
|
||||||
|
metadata:
|
||||||
|
name: cdi
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "kubevirt/containerized-data-importer v1.65.0"
|
||||||
|
spec:
|
||||||
|
config:
|
||||||
|
featureGates:
|
||||||
|
- HonorWaitForFirstConsumer
|
||||||
|
- WebhookPvcRendering
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
infra:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
|
workload:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
5779
apps/cdi/cdi-operator.yaml
Normal file
5779
apps/cdi/cdi-operator.yaml
Normal file
File diff suppressed because it is too large
Load Diff
169
apps/fc-aistation/fc-aistation.yaml
Normal file
169
apps/fc-aistation/fc-aistation.yaml
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
# FlowerCore.AiStation.Web GitOps adoption manifest.
|
||||||
|
#
|
||||||
|
# Authored from the already-live fc-aistation resources on 2026-06-04.
|
||||||
|
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||||
|
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: aistation-web-data
|
||||||
|
namespace: fc-aistation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-aistation
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
volumeName: pvc-27448d6f-6e66-42a7-a293-73dd8bbd6b3e
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: aistation-web
|
||||||
|
namespace: fc-aistation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-aistation
|
||||||
|
spec:
|
||||||
|
progressDeadlineSeconds: 600
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
prometheus.io/path: /metrics/prometheus
|
||||||
|
prometheus.io/port: "5000"
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: aistation-web-config
|
||||||
|
image: localhost/fc-aistation-web:v20260602-aistation-owned-deploy-fix2
|
||||||
|
imagePullPolicy: Never
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 3
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: aistation-web
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000
|
||||||
|
name: http
|
||||||
|
protocol: TCP
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 6
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources: {}
|
||||||
|
terminationMessagePath: /dev/termination-log
|
||||||
|
terminationMessagePolicy: File
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: data
|
||||||
|
dnsPolicy: ClusterFirst
|
||||||
|
restartPolicy: Always
|
||||||
|
schedulerName: default-scheduler
|
||||||
|
securityContext: {}
|
||||||
|
terminationGracePeriodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: aistation-web-data
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: aistation-web
|
||||||
|
namespace: fc-aistation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-aistation
|
||||||
|
spec:
|
||||||
|
clusterIP: 10.43.211.127
|
||||||
|
clusterIPs:
|
||||||
|
- 10.43.211.127
|
||||||
|
internalTrafficPolicy: Cluster
|
||||||
|
ipFamilies:
|
||||||
|
- IPv4
|
||||||
|
ipFamilyPolicy: SingleStack
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 5000
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
sessionAffinity: None
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: aistation-web-tls
|
||||||
|
namespace: fc-aistation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web-tls
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-aistation
|
||||||
|
spec:
|
||||||
|
dnsNames:
|
||||||
|
- aistation.iamworkin.lan
|
||||||
|
issuerRef:
|
||||||
|
kind: ClusterIssuer
|
||||||
|
name: step-ca-acme
|
||||||
|
secretName: aistation-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: aistation-web
|
||||||
|
namespace: fc-aistation
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aistation-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-aistation
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`aistation.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: aistation-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: aistation-web-tls
|
||||||
@@ -1,5 +1,206 @@
|
|||||||
# FlowerCore Chat — TLS + Ingress
|
# FlowerCore Chat
|
||||||
# Deployment and Service managed by deploy script (not ArgoCD)
|
#
|
||||||
|
# ArgoCD-managed workload plus TLS/Ingress. The chat-web-secret remains an
|
||||||
|
# out-of-band Secret until the values are moved into a 1Password-backed item;
|
||||||
|
# the Deployment references it as optional so GitOps can own the workload
|
||||||
|
# without storing secret material in this repo.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-chat
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: chat-web-config
|
||||||
|
namespace: fc-chat
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
data:
|
||||||
|
ASPNETCORE_ENVIRONMENT: Production
|
||||||
|
ASPNETCORE_URLS: "http://+:8080"
|
||||||
|
ASPNETCORE_FORWARDEDHEADERS_ENABLED: "true"
|
||||||
|
FlowerCore__Auth__Enabled: "false"
|
||||||
|
FlowerCore__Auth__Oidc__Enabled: "true"
|
||||||
|
FlowerCore__Auth__Oidc__Authority: "https://id.iamworkin.lan/application/o/chat/"
|
||||||
|
FlowerCore__Auth__Oidc__Audience: "chat"
|
||||||
|
FlowerCore__Auth__Oidc__ClientId: "chat"
|
||||||
|
FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
|
||||||
|
# Ollama target. Switched 2026-04-25 from edge1 Pi5 (10.0.57.17) to BLUEJAY-WS
|
||||||
|
# workstation (10.0.56.20, RX 9070 XT 16GB, OLLAMA_HOST=0.0.0.0:11434, Vulkan
|
||||||
|
# backend per feedback_rdna4_vulkan_broken). The Pi5 was timing out every team-
|
||||||
|
# round speaker at the 300s per-turn cap (live-proven 2026-04-25 03:53 UTC,
|
||||||
|
# see feedback_chat_team_round_edge1_too_slow). Workstation has gemma3:4b for
|
||||||
|
# the Cheap tier, plus gemma3:27b/phi4:14b/qwen3:14b for Default/Balanced/Deep.
|
||||||
|
# Piper TTS stays on edge1 below (different service, Pi handles TTS fine).
|
||||||
|
FlowerCore__AI__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||||
|
FlowerCore__AI__DefaultModelName: "phi4:14b"
|
||||||
|
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.56.20:11434"
|
||||||
|
ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.17:11434"
|
||||||
|
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:12b"
|
||||||
|
FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
|
||||||
|
FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
|
||||||
|
FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"
|
||||||
|
FlowerCore__AI__Memory__EnableSharedIndexingBackfill: "true"
|
||||||
|
FlowerCore__AI__Memory__SharedIndexingDatabasePath: "/data/chat-memory-index.db"
|
||||||
|
FlowerCore__AI__Skills__Library__LibraryApiUrl: "http://library-web.fc-library.svc.cluster.local"
|
||||||
|
FlowerCore__AI__Skills__Retail__RetailApiUrl: "http://retail-web.fc-retail.svc.cluster.local"
|
||||||
|
FlowerCore__AI__Skills__Intranet__IntranetBaseUrl: "http://intranet-web.intranet.svc.cluster.local"
|
||||||
|
FlowerCore__AI__Skills__Print__PrintMcpBaseUrl: "http://10.0.57.16:5200"
|
||||||
|
FlowerCore__AI__IrcBridge__Enabled: "true"
|
||||||
|
FlowerCore__AI__IrcBridge__DefaultProfileSlug: "it-helpdesk"
|
||||||
|
FlowerCore__AI__IrcBridge__MentionProfileSlug: "it-helpdesk"
|
||||||
|
FlowerCore__AI__IrcBridge__MentionReactiveMode: "mentions-only"
|
||||||
|
FlowerCore__AI__IrcBridge__AllowActionExecution: "false"
|
||||||
|
FlowerCore__AI__Voice__Piper__Host: "10.0.57.17"
|
||||||
|
FlowerCore__AI__Voice__Piper__Port: "10400"
|
||||||
|
FlowerCore__AI__Voice__OutputRoot: "/data/audio"
|
||||||
|
FlowerCore__AI__Voice__RetentionDays: "30"
|
||||||
|
# LLM provider abstraction (ADR-088). Anthropic stays disabled here -- when
|
||||||
|
# an operator wants to enable Claude, they flip Enabled=true and mount
|
||||||
|
# FlowerCore__Anthropic__ApiKey from the onepassword-synced Secret (see
|
||||||
|
# docs/ai-agents/anthropic-integration.md).
|
||||||
|
FlowerCore__Anthropic__Enabled: "false"
|
||||||
|
FlowerCore__Anthropic__BaseUrl: "https://api.anthropic.com"
|
||||||
|
FlowerCore__Anthropic__DefaultModel: "claude-sonnet-4-6"
|
||||||
|
FlowerCore__Anthropic__CheapModel: "claude-haiku-4-5-20251001"
|
||||||
|
FlowerCore__Anthropic__DeepModel: "claude-opus-4-7"
|
||||||
|
FlowerCore__Budget__ResponseCacheEnabled: "true"
|
||||||
|
OTEL_SERVICE_NAME: FlowerCore.Chat
|
||||||
|
OTEL_EXPORTER_OTLP_ENDPOINT: "http://otel-collector.monitoring.svc.cluster.local:4317"
|
||||||
|
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: chat-web-data
|
||||||
|
namespace: fc-chat
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chat-web
|
||||||
|
namespace: fc-chat
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics/prometheus"
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: rke2-server
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: chat-web
|
||||||
|
image: localhost/fc-chat-web:v20260603-oidc-authentik
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: chat-web-config
|
||||||
|
- secretRef:
|
||||||
|
name: chat-web-secret
|
||||||
|
optional: true
|
||||||
|
env:
|
||||||
|
- name: FlowerCore__Auth__Oidc__Authority
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: chat-oidc-client
|
||||||
|
key: issuer_url
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientId
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: chat-oidc-client
|
||||||
|
key: client_id
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: chat-oidc-client
|
||||||
|
key: client_secret
|
||||||
|
optional: true
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
limits:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 6
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
timeoutSeconds: 5
|
||||||
|
failureThreshold: 3
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: chat-web-data
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chat-web
|
||||||
|
namespace: fc-chat
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: chat-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
protocol: TCP
|
||||||
---
|
---
|
||||||
apiVersion: cert-manager.io/v1
|
apiVersion: cert-manager.io/v1
|
||||||
kind: Certificate
|
kind: Certificate
|
||||||
@@ -30,3 +231,41 @@ spec:
|
|||||||
port: 80
|
port: 80
|
||||||
tls:
|
tls:
|
||||||
secretName: chat-web-tls
|
secretName: chat-web-tls
|
||||||
|
---
|
||||||
|
# Public host profile marker. The app treats this header as authoritative for
|
||||||
|
# the public twin, while the internal chat.iamworkin.lan route does not attach
|
||||||
|
# it and keeps the operator-oriented UI.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: Middleware
|
||||||
|
metadata:
|
||||||
|
name: chat-public-profile-header
|
||||||
|
namespace: fc-chat
|
||||||
|
spec:
|
||||||
|
headers:
|
||||||
|
customRequestHeaders:
|
||||||
|
X-FC-Chat-Host-Profile: "public"
|
||||||
|
---
|
||||||
|
# Public Cloudflare-fronted twin for the anonymous chat surface. Operator
|
||||||
|
# paths are intentionally absent from the allowlist below, so /admin,
|
||||||
|
# /operator, /console, /ops, /api/operator, and /operatorhub miss this route
|
||||||
|
# and return Traefik 404 before reaching the pod. Operator action still needed:
|
||||||
|
# create/verify Cloudflare DNS chat.flowercore.io -> public Traefik endpoint
|
||||||
|
# and mirror the cf-origin-flowercore-io TLS secret into namespace fc-chat.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: chat-web-public
|
||||||
|
namespace: fc-chat
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`chat.flowercore.io`) && (Path(`/`) || Path(`/chat`) || PathPrefix(`/_blazor`) || PathPrefix(`/_framework`) || PathPrefix(`/_content`) || PathPrefix(`/avatars`) || PathPrefix(`/css`) || PathPrefix(`/js`) || PathPrefix(`/favicon`) || PathPrefix(`/chathub`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
middlewares:
|
||||||
|
- name: chat-public-profile-header
|
||||||
|
services:
|
||||||
|
- name: chat-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: cf-origin-flowercore-io
|
||||||
|
|||||||
@@ -1,5 +1,18 @@
|
|||||||
# FlowerCore Remote Desktop — TLS + Ingress
|
# FlowerCore Remote Desktop — TLS + Ingress
|
||||||
# Deployment and Service managed by deploy script (not ArgoCD)
|
#
|
||||||
|
# Source-of-truth split:
|
||||||
|
# - bluejay-infra OWNS: Certificate, IngressRoute, all NetworkPolicies
|
||||||
|
# (see network-policies.yaml in this directory).
|
||||||
|
# - FlowerCore.RemoteDesktop scripts/deploy-web.sh OWNS: Deployment +
|
||||||
|
# Service. Reason: image refs like `localhost/fc-desktop:linux-xfce`
|
||||||
|
# only exist on each node's containerd after a manual import, so a
|
||||||
|
# Deployment manifest in bluejay-infra would race the image-import
|
||||||
|
# step and crash-loop.
|
||||||
|
#
|
||||||
|
# NetworkPolicies moved into bluejay-infra 2026-05-07 — previously they
|
||||||
|
# were applied via the deploy script's kubectl apply calls, which broke
|
||||||
|
# cluster-rebuild repeatability. See
|
||||||
|
# feedback_networkpolicies_belong_in_bluejay_infra.md.
|
||||||
---
|
---
|
||||||
apiVersion: cert-manager.io/v1
|
apiVersion: cert-manager.io/v1
|
||||||
kind: Certificate
|
kind: Certificate
|
||||||
|
|||||||
332
apps/fc-desktop/network-policies.yaml
Normal file
332
apps/fc-desktop/network-policies.yaml
Normal file
@@ -0,0 +1,332 @@
|
|||||||
|
# FlowerCore Remote Desktop — NetworkPolicies (GitOps-managed)
|
||||||
|
#
|
||||||
|
# Moved into bluejay-infra 2026-05-07 as part of the regroup audit. These
|
||||||
|
# four policies were previously applied via FlowerCore.RemoteDesktop's
|
||||||
|
# scripts/deploy-web.sh `kubectl apply` calls, which meant a fresh cluster
|
||||||
|
# rebuild from bluejay-infra alone would miss them — Browser Lab session
|
||||||
|
# isolation, control-plane allow-list, and HTTP-01 cert renewal would all
|
||||||
|
# silently fail to come up.
|
||||||
|
#
|
||||||
|
# Source-of-truth contract:
|
||||||
|
# - bluejay-infra OWNS all NetworkPolicy + Certificate + IngressRoute
|
||||||
|
# resources for fc-desktop.
|
||||||
|
# - FlowerCore.RemoteDesktop's scripts/deploy-web.sh continues to own
|
||||||
|
# the Deployment + Service apply (because the image ref
|
||||||
|
# `localhost/fc-desktop:linux-xfce` only exists on each node's
|
||||||
|
# containerd after a manual import — it can't be pulled from a
|
||||||
|
# registry, so a Deployment manifest in bluejay-infra would race the
|
||||||
|
# image-import step and crash-loop).
|
||||||
|
---
|
||||||
|
# 1) desktop-isolation — Browser Lab session pods.
|
||||||
|
#
|
||||||
|
# Locks down pods labeled `app.kubernetes.io/name=remote-desktop` (every
|
||||||
|
# session pod regardless of template). Allows guacd ingress for the VNC/RDP
|
||||||
|
# display lane and remotedesktop-web's pre-handoff probing. Egress: NFS to
|
||||||
|
# Synology, DNS, Traefik (cluster + LB VIP), Intranet (Browser Lab home).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: desktop-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remote-desktop
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 445
|
||||||
|
protocol: TCP
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.33.87/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 2) fc-desktop-default-deny — namespace-wide catch-all.
|
||||||
|
#
|
||||||
|
# Selects every pod EXCEPT remotedesktop-web (the public-surface control
|
||||||
|
# plane) and applies default-deny semantics for both Ingress and Egress.
|
||||||
|
# Closes the gap where session pods land WITHOUT the desktop-isolation
|
||||||
|
# policy's `app.kubernetes.io/name=remote-desktop` label, plus prevents
|
||||||
|
# arbitrary debug sidecars / kubectl debug images from getting cluster
|
||||||
|
# access.
|
||||||
|
#
|
||||||
|
# CRITICAL: also catches transient cm-acme-http-solver pods (that's the
|
||||||
|
# bug this whole regroup chased). The cm-acme-http-solver-allow policy
|
||||||
|
# below is the explicit carve-out.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-desktop-default-deny
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app.kubernetes.io/name
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
---
|
||||||
|
# 3) remotedesktop-web-isolation — control plane explicit allow-list.
|
||||||
|
#
|
||||||
|
# remotedesktop-web is the only pod label the default-deny excludes, so
|
||||||
|
# without this policy the control plane would have wide-open Ingress AND
|
||||||
|
# Egress. This re-introduces a tight allow-list:
|
||||||
|
# - Ingress: Traefik only on TCP/8080
|
||||||
|
# - Egress: CoreDNS, K8s API, Guacamole admin, NFS, Intranet,
|
||||||
|
# Traefik (cluster + LB), and the fc-desktop namespace itself
|
||||||
|
# (for session pod readiness probing).
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: remotedesktop-web-isolation
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: remotedesktop-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# K8s API server
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Guacamole admin
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: guacamole
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# NFS to Synology
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
# Intranet web
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: intranet
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: intranet-web
|
||||||
|
ports:
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
# Cluster Traefik pods (in-cluster service resolution + Guacamole
|
||||||
|
# routing handoff where web app builds URLs against the public host
|
||||||
|
# but resolves internally).
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# fc-desktop namespace — session pod probing during browser-access
|
||||||
|
# readiness checks.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-desktop
|
||||||
|
ports:
|
||||||
|
- port: 3000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3001
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5901
|
||||||
|
protocol: TCP
|
||||||
|
- port: 3389
|
||||||
|
protocol: TCP
|
||||||
|
---
|
||||||
|
# 4) cm-acme-http-solver-allow — cert-manager HTTP-01 carve-out.
|
||||||
|
#
|
||||||
|
# Without this, fc-desktop-default-deny catches the transient solver pods
|
||||||
|
# cert-manager creates for each renewal (they don't carry the
|
||||||
|
# remotedesktop-web label). Caused 8-day silent renewal failure on
|
||||||
|
# desktop.iamworkin.lan in 2026-04-28..2026-05-07 (see
|
||||||
|
# feedback_certmanager_renewal_stuck_when_solver_blocked_by_namespace_default_deny.md).
|
||||||
|
#
|
||||||
|
# Authorizes:
|
||||||
|
# - Ingress on TCP/8089 from cluster Traefik (which proxies the external
|
||||||
|
# HTTP-01 GET on port 80 through to the solver).
|
||||||
|
# - Egress for cluster DNS (defensive — newer cert-manager probes from
|
||||||
|
# inside the solver too).
|
||||||
|
#
|
||||||
|
# The `acme.cert-manager.io/http01-solver=true` label is set by
|
||||||
|
# cert-manager itself on every solver pod automatically.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: cm-acme-http-solver-allow
|
||||||
|
namespace: fc-desktop
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: remotedesktop
|
||||||
|
app.kubernetes.io/component: cert-renewal
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
acme.cert-manager.io/http01-solver: "true"
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
26
apps/fc-devicemgmt/1password-item.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Runtime secrets for FlowerCore.DeviceManagement.
|
||||||
|
#
|
||||||
|
# OnePasswordItem operator syncs this item into a Kubernetes Secret with the
|
||||||
|
# same name. Expected fields:
|
||||||
|
# DB-Password
|
||||||
|
# mtls-ca.pem
|
||||||
|
# mtls-client.crt
|
||||||
|
# mtls-client.key
|
||||||
|
# mtls-chain.pem
|
||||||
|
#
|
||||||
|
# Do not add literal secret values to this repo. Runtime pods consume the
|
||||||
|
# synced Secret through env vars and read-only mounts.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/component: secrets
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"
|
||||||
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
30
apps/fc-devicemgmt/certificate-web.yaml
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# Certificate for devices.iamworkin.lan.
|
||||||
|
#
|
||||||
|
# Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
|
||||||
|
# devices.iamworkin.lan -> 10.0.56.200
|
||||||
|
# before this Certificate is synced. step-ca ACME cannot see the CoreDNS
|
||||||
|
# wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
|
||||||
|
# (feedback_pfsense_dns_required_for_acme).
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-tls
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
|
||||||
|
spec:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- devices.iamworkin.lan
|
||||||
|
duration: 720h
|
||||||
|
renewBefore: 240h
|
||||||
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
81
apps/fc-devicemgmt/clusterrole-operator.yaml
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
rules:
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- devices.flowercore.io
|
||||||
|
resources:
|
||||||
|
- devices/status
|
||||||
|
- devices/finalizers
|
||||||
|
- devicegroups/status
|
||||||
|
- devicegroups/finalizers
|
||||||
|
- devicepolicies/status
|
||||||
|
- devicepolicies/finalizers
|
||||||
|
- remotecommands/status
|
||||||
|
- remotecommands/finalizers
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- apiGroups:
|
||||||
|
- apps
|
||||||
|
resources:
|
||||||
|
- deployments
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- services
|
||||||
|
- configmaps
|
||||||
|
- secrets
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- batch
|
||||||
|
resources:
|
||||||
|
- jobs
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
|
- create
|
||||||
|
- update
|
||||||
|
- patch
|
||||||
|
- delete
|
||||||
|
- apiGroups:
|
||||||
|
- networking.k8s.io
|
||||||
|
resources:
|
||||||
|
- networkpolicies
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- watch
|
||||||
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
19
apps/fc-devicemgmt/clusterrolebinding-operator.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
109
apps/fc-devicemgmt/deployment-operator.yaml
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
# FlowerCore.DeviceManagement Operator.
|
||||||
|
#
|
||||||
|
# KubeOps controller for devices.flowercore.io resources. Operator-created
|
||||||
|
# children must set OwnerReferences + traceability labels/annotations per
|
||||||
|
# k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
|
||||||
|
# apps/deployments/get so the process can resolve its own Deployment UID.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
serviceAccountName: fc-devicemgmt-operator
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: operator
|
||||||
|
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: metrics
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: POD_NAMESPACE
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.namespace
|
||||||
|
- name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
|
||||||
|
value: "fc-devicemgmt-operator"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Operator"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 50m
|
||||||
|
memory: 128Mi
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 512Mi
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 20
|
||||||
|
periodSeconds: 30
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
151
apps/fc-devicemgmt/deployment-web.yaml
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
|
||||||
|
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||||
|
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||||
|
# nodes before letting ArgoCD sync a live rollout.
|
||||||
|
#
|
||||||
|
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
||||||
|
# The Web pod cannot start until TWO upstream gaps close:
|
||||||
|
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
||||||
|
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
||||||
|
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
||||||
|
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
||||||
|
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
||||||
|
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
||||||
|
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
||||||
|
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
||||||
|
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
||||||
|
# password configured for the MySQL user.
|
||||||
|
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
||||||
|
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
||||||
|
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 0
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics"
|
||||||
|
flowercore.io/audit-trace-id: "runtime-activity-trace"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-devicemgmt-web:v20260512-cx5
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
- name: FlowerCore__Service__Name
|
||||||
|
value: "FlowerCore.DeviceManagement.Web"
|
||||||
|
- name: FlowerCore__DeviceManagement__DefaultTenantId
|
||||||
|
value: "system"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "MySql"
|
||||||
|
- name: FlowerCore__Database__Host
|
||||||
|
value: "mysql.fc-mysql.svc"
|
||||||
|
- name: FlowerCore__Database__Database
|
||||||
|
value: "flowercore_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__User
|
||||||
|
value: "fc_devicemgmt"
|
||||||
|
- name: FlowerCore__Database__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: fc-devicemgmt-runtime
|
||||||
|
key: DB-Password
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__CaPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-ca.pem"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientCertificatePath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.crt"
|
||||||
|
- name: FlowerCore__DeviceManagement__AgentMtls__ClientKeyPath
|
||||||
|
value: "/secrets/devicemgmt-mtls/mtls-client.key"
|
||||||
|
- name: FlowerCore__EventBus__Redis__Configuration
|
||||||
|
value: "redis.fc-redis.svc:6379"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 100m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
startupProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
mountPath: /secrets/devicemgmt-mtls
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
- name: devicemgmt-mtls
|
||||||
|
secret:
|
||||||
|
secretName: fc-devicemgmt-runtime
|
||||||
|
defaultMode: 0400
|
||||||
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
55
apps/fc-devicemgmt/ingressroute-web.yaml
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# LAN ingress for FlowerCore.DeviceManagement Web.
|
||||||
|
#
|
||||||
|
# RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
|
||||||
|
# ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`devices.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: fc-devicemgmt-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: fc-devicemgmt-web-tls
|
||||||
|
|
||||||
|
# Future public agent/update host gate (OFF by default):
|
||||||
|
#
|
||||||
|
# Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
|
||||||
|
# resolves the public-device-management auth model and route ownership with
|
||||||
|
# UpdateCenter. When enabled, use a separate public IngressRoute with an
|
||||||
|
# explicit Method allowlist, public-host auth middleware, and public TLS
|
||||||
|
# certificate strategy. Leaving this as comments keeps ArgoCD from stealing
|
||||||
|
# live UpdateCenter traffic.
|
||||||
|
#
|
||||||
|
# apiVersion: traefik.io/v1alpha1
|
||||||
|
# kind: IngressRoute
|
||||||
|
# metadata:
|
||||||
|
# name: fc-devicemgmt-web-public
|
||||||
|
# namespace: fc-devicemgmt
|
||||||
|
# annotations:
|
||||||
|
# flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
|
||||||
|
# spec:
|
||||||
|
# entryPoints:
|
||||||
|
# - websecure
|
||||||
|
# routes:
|
||||||
|
# - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
# kind: Rule
|
||||||
|
# services:
|
||||||
|
# - name: fc-devicemgmt-web
|
||||||
|
# port: 80
|
||||||
|
# tls:
|
||||||
|
# secretName: fc-devicemgmt-public-tls
|
||||||
13
apps/fc-devicemgmt/namespace.yaml
Normal file
13
apps/fc-devicemgmt/namespace.yaml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
# FlowerCore.DeviceManagement namespace.
|
||||||
|
#
|
||||||
|
# ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
224
apps/fc-devicemgmt/network-policy.yaml
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
# FlowerCore.DeviceManagement NetworkPolicies.
|
||||||
|
#
|
||||||
|
# NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
|
||||||
|
# Rules include Traefik post-DNAT backend ports per
|
||||||
|
# feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
|
||||||
|
# cold-tier / future artifact path.
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
# LAN edge: only cluster Traefik should reach the Web pod for
|
||||||
|
# devices.iamworkin.lan.
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
|
||||||
|
- from:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Database namespace.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-mysql
|
||||||
|
ports:
|
||||||
|
- port: 3306
|
||||||
|
protocol: TCP
|
||||||
|
# Redis backplane for multi-replica SignalR / live-status fan-out.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
protocol: TCP
|
||||||
|
# Traefik VIP / in-cluster Traefik for self-callbacks and public URL
|
||||||
|
# generation tests. Include post-DNAT backend ports 8443 + 8080.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
|
||||||
|
# ThinClient, or Server mode. Keep this private-range only.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS cold-tier / artifact mount allowance.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator-isolation
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-devicemgmt-operator
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: monitoring
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
egress:
|
||||||
|
# CoreDNS.
|
||||||
|
- to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
podSelector:
|
||||||
|
matchLabels:
|
||||||
|
k8s-app: kube-dns
|
||||||
|
ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
# Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
|
||||||
|
- to: []
|
||||||
|
ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 6443
|
||||||
|
protocol: TCP
|
||||||
|
# Agent egress for operator-initiated probes / fallback command dispatch.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.0/24
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.68.0/27
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5000
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5001
|
||||||
|
protocol: TCP
|
||||||
|
# Synology NFS allowance for future cold-tier/audit archival jobs.
|
||||||
|
- to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.58.3/32
|
||||||
|
ports:
|
||||||
|
- port: 2049
|
||||||
|
protocol: TCP
|
||||||
|
- port: 2049
|
||||||
|
protocol: UDP
|
||||||
|
- port: 111
|
||||||
|
protocol: TCP
|
||||||
|
- port: 111
|
||||||
|
protocol: UDP
|
||||||
22
apps/fc-devicemgmt/service-web.yaml
Normal file
22
apps/fc-devicemgmt/service-web.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-web
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-devicemgmt-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
protocol: TCP
|
||||||
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
12
apps/fc-devicemgmt/serviceaccount-operator.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: fc-devicemgmt-operator
|
||||||
|
namespace: fc-devicemgmt
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-devicemgmt-operator
|
||||||
|
app.kubernetes.io/component: operator
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
@@ -74,6 +74,14 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
itemPath: "vaults/IAmWorkin/items/FlowerCore Edition Signing Key - edition:aistation-field"
|
itemPath: "vaults/IAmWorkin/items/FlowerCore Edition Signing Key - edition:aistation-field"
|
||||||
---
|
---
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: distribution-oidc-client
|
||||||
|
namespace: fc-distribution
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/distribution-oidc-client"
|
||||||
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
@@ -118,7 +126,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
|
||||||
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-distribution:v202604240010
|
image: localhost/fc-distribution:v202605061948
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
@@ -130,6 +138,30 @@ spec:
|
|||||||
value: "Production"
|
value: "Production"
|
||||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
value: "false"
|
value: "false"
|
||||||
|
- name: FlowerCore__Auth__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Auth__Oidc__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Auth__Oidc__Authority
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: distribution-oidc-client
|
||||||
|
key: issuer_url
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__Audience
|
||||||
|
value: "distribution"
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientId
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: distribution-oidc-client
|
||||||
|
key: client_id
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: distribution-oidc-client
|
||||||
|
key: client_secret
|
||||||
|
optional: true
|
||||||
# SQLite connection (catalog + data-protection keys via FlowerCoreDbContext).
|
# SQLite connection (catalog + data-protection keys via FlowerCoreDbContext).
|
||||||
# Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins.
|
# Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins.
|
||||||
- name: FlowerCore__Database__Provider
|
- name: FlowerCore__Database__Provider
|
||||||
@@ -151,6 +183,10 @@ spec:
|
|||||||
value: "/signing/aistation-field/chain.pem"
|
value: "/signing/aistation-field/chain.pem"
|
||||||
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
- name: FlowerCore__Distribution__Signing__EditionCerts__aistation-field__KeyPath
|
||||||
value: "/signing/aistation-field/private-key.pem"
|
value: "/signing/aistation-field/private-key.pem"
|
||||||
|
# Public distribution host is GET/HEAD-only at Traefik; this
|
||||||
|
# entitlement list controls which editions are readable there.
|
||||||
|
- name: FlowerCore__Distribution__EntitlementPublic__PublicEditions__0
|
||||||
|
value: "*"
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
@@ -262,8 +298,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- dist.iamworkin.lan
|
- dist.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10880+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
45
apps/fc-divoom-dm-pi-device/README.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# FlowerCore Divoom DM Pi Device
|
||||||
|
|
||||||
|
Source-controlled Puppet/Hiera deployment contract for registering the edge2
|
||||||
|
Divoom MiniToo panel as a FlowerCore DeviceManagement-managed Pi device.
|
||||||
|
|
||||||
|
This is not a Kubernetes application. The live panel remains the existing
|
||||||
|
edge2 `flowercore-divoom.service` managed by `FlowerCore.Puppet`
|
||||||
|
`profile::pi::service::divoom`, with the .NET payload deployed out of band
|
||||||
|
and `/opt/flowercore/divoom/data` plus the Bluetooth shell wrappers preserved.
|
||||||
|
Because edge2 is already Hiera-driven through `profile::pi::service::apps`,
|
||||||
|
the deploy home is additive `profile::pi::service` data/profile source, not
|
||||||
|
`profile::edge::service::apps` and not an ArgoCD/K8s app.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Stage DeviceManagement registration metadata for the edge2 Divoom MiniToo.
|
||||||
|
- Stage a separate, disabled-by-default DM Agent executor unit for privileged
|
||||||
|
Bluetooth operations once the DM-RPC lane lands.
|
||||||
|
- Keep `flowercore-divoom.service` and `flowercore-divoom-bt.service`
|
||||||
|
untouched: no service replacement, no restart subscription, no K8s surface.
|
||||||
|
- Preserve the current wrapper contract:
|
||||||
|
`/opt/flowercore/divoom/bt-link.sh`,
|
||||||
|
`/opt/flowercore/divoom/bt-reset.sh`, and
|
||||||
|
`/opt/flowercore/divoom/audio-link.sh`.
|
||||||
|
- Keep FM radio disabled and require visible render proof; device-info echo is
|
||||||
|
not render proof.
|
||||||
|
|
||||||
|
## Artifact Map
|
||||||
|
|
||||||
|
| Path | Use |
|
||||||
|
| --- | --- |
|
||||||
|
| `hiera/edge2-divoom-dm-device.overlay.yaml` | Additive Hiera overlay for edge2. Merge into the existing node YAML without removing `fc-pimanager` or `fc-divoom`. |
|
||||||
|
| `puppet/profile/pi/service/divoom_dm_device.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet` after the DM-RPC executor binary exists. |
|
||||||
|
| `puppet/templates/divoom-device-registration.json.epp` | DM device registration metadata rendered on edge2. |
|
||||||
|
| `puppet/templates/flowercore-divoom-dm-agent.service.epp` | Separate DM Agent systemd unit. Defaults are stopped and disabled until a later cutover. |
|
||||||
|
|
||||||
|
## Rollout Notes
|
||||||
|
|
||||||
|
1. Land these artifacts in bluejay-infra as the deploy contract.
|
||||||
|
2. Vendor the Puppet profile and EPP templates into `FlowerCore.Puppet`.
|
||||||
|
3. Merge the Hiera overlay into `data/nodes/edge2.iamworkin.lan.yaml`.
|
||||||
|
4. Run Puppet in noop first, preferably with a node-local validation directory
|
||||||
|
under `~/.fcv` rather than `/tmp`.
|
||||||
|
5. Only enable the DM Agent service after the DeviceManagement BT executor has
|
||||||
|
landed and passed operator-eyeball render proof.
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
---
|
||||||
|
# Merge into FlowerCore.Puppet data/nodes/edge2.iamworkin.lan.yaml.
|
||||||
|
# Additive overlay only: keep the existing fc-pimanager version/tarball entry,
|
||||||
|
# keep fc-divoom enabled, and do not move Divoom into Kubernetes.
|
||||||
|
|
||||||
|
profile::pi::service::apps:
|
||||||
|
fc-pimanager:
|
||||||
|
binary: 'FlowerCore.PiManager.Web'
|
||||||
|
install_dir: '/opt/fc-pimanager'
|
||||||
|
port: 5000
|
||||||
|
environment: 'edge2'
|
||||||
|
version: '2026.05.28.1646'
|
||||||
|
tarball_source: 'puppet:///modules/profile/pi/builds/fc-pimanager.tar.gz'
|
||||||
|
fc-divoom:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
profile::pi::service::divoom_dm_device::ensure: 'present'
|
||||||
|
profile::pi::service::divoom_dm_device::service_enabled: false
|
||||||
|
profile::pi::service::divoom_dm_device::service_ensure: 'stopped'
|
||||||
|
profile::pi::service::divoom_dm_device::device_id: 'edge2-divoom-minitoo'
|
||||||
|
profile::pi::service::divoom_dm_device::display_name: 'edge2 Divoom MiniToo'
|
||||||
|
profile::pi::service::divoom_dm_device::host_fqdn: 'edge2.iamworkin.lan'
|
||||||
|
profile::pi::service::divoom_dm_device::dm_web_url: 'https://devicemgmt.iamworkin.lan'
|
||||||
|
profile::pi::service::divoom_dm_device::divoom_install_dir: '/opt/flowercore/divoom'
|
||||||
|
profile::pi::service::divoom_dm_device::agent_install_dir: '/opt/flowercore/devicemanagement-agent'
|
||||||
|
profile::pi::service::divoom_dm_device::bt_candidate_channels:
|
||||||
|
- '1'
|
||||||
|
- '10'
|
||||||
|
profile::pi::service::divoom_dm_device::default_bt_channel: '1'
|
||||||
|
profile::pi::service::divoom_dm_device::a2dp_default_state: 'off'
|
||||||
|
profile::pi::service::divoom_dm_device::fm_radio_enabled: false
|
||||||
|
profile::pi::service::divoom_dm_device::visible_render_proof_required: true
|
||||||
@@ -0,0 +1,140 @@
|
|||||||
|
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_dm_device.pp.
|
||||||
|
# This profile is additive to profile::pi::service::divoom. It must not manage,
|
||||||
|
# restart, replace, or subscribe the existing flowercore-divoom.service.
|
||||||
|
class profile::pi::service::divoom_dm_device (
|
||||||
|
Enum['present', 'absent'] $ensure = 'present',
|
||||||
|
Boolean $service_enabled = false,
|
||||||
|
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||||
|
String $service_name = 'flowercore-divoom-dm-agent',
|
||||||
|
String $device_id = 'edge2-divoom-minitoo',
|
||||||
|
String $display_name = 'edge2 Divoom MiniToo',
|
||||||
|
String $host_fqdn = 'edge2.iamworkin.lan',
|
||||||
|
String $dm_web_url = 'https://devicemgmt.iamworkin.lan',
|
||||||
|
String $divoom_install_dir = '/opt/flowercore/divoom',
|
||||||
|
String $agent_install_dir = '/opt/flowercore/devicemanagement-agent',
|
||||||
|
String $agent_binary = 'FlowerCore.DeviceManagement.Agent',
|
||||||
|
Array[String] $bt_candidate_channels = ['1', '10'],
|
||||||
|
String $default_bt_channel = '1',
|
||||||
|
Enum['on', 'off'] $a2dp_default_state = 'off',
|
||||||
|
Boolean $fm_radio_enabled = false,
|
||||||
|
Boolean $visible_render_proof_required = true,
|
||||||
|
) {
|
||||||
|
include profile::workstation::safe_account_exclusion
|
||||||
|
|
||||||
|
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||||
|
$config_dir = '/etc/flowercore/device-management/devices'
|
||||||
|
$state_dir = '/var/lib/flowercore/divoom-dm-agent'
|
||||||
|
$log_dir = '/var/log/flowercore/divoom-dm-agent'
|
||||||
|
$registration_path = "${config_dir}/${device_id}.json"
|
||||||
|
$agent_binary_path = "${agent_install_dir}/${agent_binary}"
|
||||||
|
$bt_channels_json = inline_template('[<%= @bt_candidate_channels.map { |c| "\"#{c}\"" }.join(", ") %>]')
|
||||||
|
|
||||||
|
if $safe_account {
|
||||||
|
notify { 'fc-divoom-dm-device safe-account exclusion':
|
||||||
|
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom DM Pi device profile refused to apply on operator workstation',
|
||||||
|
}
|
||||||
|
|
||||||
|
if $facts['os']['family'] != 'windows' {
|
||||||
|
ensure_resource('file', '/var/log/flowercore-audit', {
|
||||||
|
'ensure' => 'directory',
|
||||||
|
'owner' => 'root',
|
||||||
|
'group' => 'root',
|
||||||
|
'mode' => '0755',
|
||||||
|
})
|
||||||
|
|
||||||
|
file { '/var/log/flowercore-audit/safe-account-noop-fc-divoom-dm-device.log':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => "noop: divoom dm pi device profile refused to apply on safe-account host\n",
|
||||||
|
require => File['/var/log/flowercore-audit'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} elsif $ensure == 'absent' {
|
||||||
|
service { $service_name:
|
||||||
|
ensure => stopped,
|
||||||
|
enable => false,
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [
|
||||||
|
"/etc/systemd/system/${service_name}.service",
|
||||||
|
$registration_path,
|
||||||
|
]:
|
||||||
|
ensure => absent,
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
case $facts['os']['family'] {
|
||||||
|
'Debian': {}
|
||||||
|
default: { fail("profile::pi::service::divoom_dm_device only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [$config_dir, $state_dir, $log_dir]:
|
||||||
|
ensure => directory,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
file { $registration_path:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => epp('profile/pi/fc_divoom_dm/divoom-device-registration.json.epp', {
|
||||||
|
'device_id' => $device_id,
|
||||||
|
'display_name' => $display_name,
|
||||||
|
'host_fqdn' => $host_fqdn,
|
||||||
|
'divoom_install_dir' => $divoom_install_dir,
|
||||||
|
'bt_channels_json' => $bt_channels_json,
|
||||||
|
'default_bt_channel' => $default_bt_channel,
|
||||||
|
'a2dp_default_state' => $a2dp_default_state,
|
||||||
|
'fm_radio_enabled' => $fm_radio_enabled,
|
||||||
|
'visible_render_proof_required' => $visible_render_proof_required,
|
||||||
|
}),
|
||||||
|
require => File[$config_dir],
|
||||||
|
}
|
||||||
|
|
||||||
|
file { "/etc/systemd/system/${service_name}.service":
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => epp('profile/pi/fc_divoom_dm/flowercore-divoom-dm-agent.service.epp', {
|
||||||
|
'service_name' => $service_name,
|
||||||
|
'device_id' => $device_id,
|
||||||
|
'dm_web_url' => $dm_web_url,
|
||||||
|
'registration_path' => $registration_path,
|
||||||
|
'divoom_install_dir' => $divoom_install_dir,
|
||||||
|
'agent_install_dir' => $agent_install_dir,
|
||||||
|
'agent_binary_path' => $agent_binary_path,
|
||||||
|
'state_dir' => $state_dir,
|
||||||
|
'log_dir' => $log_dir,
|
||||||
|
}),
|
||||||
|
notify => Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||||
|
require => File[$registration_path],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-dm-agent-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
service { $service_name:
|
||||||
|
ensure => $service_ensure,
|
||||||
|
enable => $service_enabled,
|
||||||
|
require => [
|
||||||
|
File["/etc/systemd/system/${service_name}.service"],
|
||||||
|
File[$registration_path],
|
||||||
|
Exec['fc-divoom-dm-agent-systemd-reload'],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"deviceId": "<%= $device_id %>",
|
||||||
|
"displayName": "<%= $display_name %>",
|
||||||
|
"hostFqdn": "<%= $host_fqdn %>",
|
||||||
|
"kind": "DivoomMiniToo",
|
||||||
|
"managedBy": "FlowerCore.DeviceManagement",
|
||||||
|
"executionMode": "Pi",
|
||||||
|
"transport": {
|
||||||
|
"kind": "BluetoothSerial",
|
||||||
|
"candidateChannels": <%= $bt_channels_json %>,
|
||||||
|
"defaultChannel": "<%= $default_bt_channel %>",
|
||||||
|
"deviceInfoIsRenderProof": false,
|
||||||
|
"visibleRenderProofRequired": <%= $visible_render_proof_required %>
|
||||||
|
},
|
||||||
|
"paths": {
|
||||||
|
"divoomInstallDir": "<%= $divoom_install_dir %>",
|
||||||
|
"btLink": "<%= $divoom_install_dir %>/bt-link.sh",
|
||||||
|
"btReset": "<%= $divoom_install_dir %>/bt-reset.sh",
|
||||||
|
"audioLink": "<%= $divoom_install_dir %>/audio-link.sh"
|
||||||
|
},
|
||||||
|
"capabilities": {
|
||||||
|
"supportsBluetoothSerial": true,
|
||||||
|
"supportsBtChannelRedetect": true,
|
||||||
|
"supportsBtHardReset": true,
|
||||||
|
"supportsBtAudioProfileSwitch": true,
|
||||||
|
"a2dpDefaultState": "<%= $a2dp_default_state %>",
|
||||||
|
"fmRadioEnabled": <%= $fm_radio_enabled %>
|
||||||
|
},
|
||||||
|
"safety": {
|
||||||
|
"preserveExistingService": "flowercore-divoom.service",
|
||||||
|
"preserveDataDirectory": "<%= $divoom_install_dir %>/data",
|
||||||
|
"doNotEnableFmRadio": true
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom DM Agent Bluetooth executor
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target bluetooth.service
|
||||||
|
Requires=bluetooth.service
|
||||||
|
ConditionPathExists=<%= $agent_binary_path %>
|
||||||
|
ConditionPathExists=<%= $registration_path %>
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh
|
||||||
|
ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=stoltz
|
||||||
|
Group=stoltz
|
||||||
|
WorkingDirectory=<%= $agent_install_dir %>
|
||||||
|
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||||
|
Environment=FLOWERCORE_DM_DEVICE_REGISTRATION=<%= $registration_path %>
|
||||||
|
Environment=Divoom__Bluetooth__DeviceInfoIsRenderProof=false
|
||||||
|
Environment=Divoom__Bluetooth__VisibleRenderProofRequired=true
|
||||||
|
Environment=Divoom__Bluetooth__A2dpDefaultState=off
|
||||||
|
ExecStart=<%= $agent_binary_path %> --mode=Pi --device-id=<%= $device_id %> --dm-web-url=<%= $dm_web_url %> --registration=<%= $registration_path %>
|
||||||
|
Restart=on-failure
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=3
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
SupplementaryGroups=bluetooth audio dialout
|
||||||
|
NoNewPrivileges=true
|
||||||
|
PrivateTmp=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=<%= $state_dir %> <%= $log_dir %>
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
44
apps/fc-divoom-tv-pi/README.md
Normal file
44
apps/fc-divoom-tv-pi/README.md
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# FlowerCore Divoom TV Pi HDMI
|
||||||
|
|
||||||
|
Source-controlled deploy shape for the native `FlowerCore.Divoom.Tv`
|
||||||
|
Avalonia HDMI renderer on a Raspberry Pi connected to a TV.
|
||||||
|
|
||||||
|
This is a Puppet/systemd appliance bundle, not a Kubernetes application. It
|
||||||
|
mirrors the existing `fc-signage-pi-player` pattern: bluejay-infra carries the
|
||||||
|
systemd units, scripts, Hiera shape, and Puppet profile source that
|
||||||
|
`FlowerCore.Puppet` vendors and installs.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Launch the future `FlowerCore.Divoom.Tv` linux-arm64 self-contained payload
|
||||||
|
from `/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv`.
|
||||||
|
- Prefer `cage` as the Wayland fullscreen compositor, with direct app launch as
|
||||||
|
a fallback for development images.
|
||||||
|
- Restart the app after HDMI hotplug with a 2 second DRM settle delay.
|
||||||
|
- Keep all runtime state local: `/var/lib/fc-divoom-tv` and
|
||||||
|
`/var/log/fc-divoom-tv`.
|
||||||
|
- Avoid CDN/runtime fetches; the app renders the in-house Divoom scene catalog
|
||||||
|
locally.
|
||||||
|
|
||||||
|
## Artifact Map
|
||||||
|
|
||||||
|
| Path | Use |
|
||||||
|
| --- | --- |
|
||||||
|
| `systemd/flowercore-divoom-tv.service` | Fullscreen Avalonia HDMI app service. |
|
||||||
|
| `systemd/flowercore-divoom-tv-hdmi.service` | HDMI hotplug responder service. |
|
||||||
|
| `systemd/99-flowercore-divoom-tv-hdmi.rules` | DRM udev hotplug rule. |
|
||||||
|
| `scripts/flowercore-divoom-tv-prelaunch.sh` | Preflight checks and local directory creation. |
|
||||||
|
| `scripts/flowercore-divoom-tv-launch.sh` | Cage-first fullscreen launcher. |
|
||||||
|
| `scripts/flowercore-divoom-tv-hdmi-respond.sh` | Hotplug settle and restart script. |
|
||||||
|
| `puppet/profile/pi/service/divoom_tv.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet`. |
|
||||||
|
| `hiera/example-divoom-tv-pi.iamworkin.lan.yaml` | Example node Hiera for a Divoom TV Pi. |
|
||||||
|
|
||||||
|
## Rollout Notes
|
||||||
|
|
||||||
|
1. Build `FlowerCore.Divoom.Tv` with `dotnet.exe publish -c Release -r linux-arm64 --self-contained`.
|
||||||
|
2. Stage the payload to `/opt/flowercore/divoom-tv/` through the standard noc1
|
||||||
|
jump path and avoid `/tmp` for unprivileged Pi scratch.
|
||||||
|
3. Vendor the profile and static files into `FlowerCore.Puppet`.
|
||||||
|
4. Run Puppet noop, then apply on the target Pi.
|
||||||
|
5. Prove deployment with `systemctl is-active flowercore-divoom-tv.service`,
|
||||||
|
journal lines showing frames presented, and a visible HDMI display check.
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
---
|
||||||
|
# Example node data for a dedicated Pi -> HDMI -> TV Divoom renderer.
|
||||||
|
# Copy into FlowerCore.Puppet data/nodes/<hostname>.iamworkin.lan.yaml only
|
||||||
|
# after the Pi has a static DHCP/DNS entry and the linux-arm64 payload exists.
|
||||||
|
|
||||||
|
facts:
|
||||||
|
role: pi_prototype
|
||||||
|
|
||||||
|
profile::motd::role: 'Divoom TV HDMI Renderer'
|
||||||
|
|
||||||
|
profile::pi::service::divoom_tv::ensure: 'present'
|
||||||
|
profile::pi::service::divoom_tv::service_enabled: true
|
||||||
|
profile::pi::service::divoom_tv::service_ensure: 'running'
|
||||||
|
profile::pi::service::divoom_tv::install_dir: '/opt/flowercore/divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::state_dir: '/var/lib/fc-divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::log_dir: '/var/log/fc-divoom-tv'
|
||||||
|
profile::pi::service::divoom_tv::presentation_mode: 'PillarboxSquare'
|
||||||
|
profile::pi::service::divoom_tv::startup_scene: 'bluejay-clock'
|
||||||
|
profile::pi::service::divoom_tv::reduced_motion: false
|
||||||
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
149
apps/fc-divoom-tv-pi/puppet/profile/pi/service/divoom_tv.pp
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_tv.pp.
|
||||||
|
# Static files come from profile/pi/fc_divoom_tv/ after this bluejay-infra
|
||||||
|
# bundle is vendored into the Puppet control repo.
|
||||||
|
class profile::pi::service::divoom_tv (
|
||||||
|
Enum['present', 'absent'] $ensure = 'present',
|
||||||
|
Boolean $service_enabled = false,
|
||||||
|
Enum['running', 'stopped'] $service_ensure = 'stopped',
|
||||||
|
String $service_name = 'flowercore-divoom-tv',
|
||||||
|
String $user = 'fc-divoom-tv',
|
||||||
|
String $group = 'fc-divoom-tv',
|
||||||
|
String $install_dir = '/opt/flowercore/divoom-tv',
|
||||||
|
String $state_dir = '/var/lib/fc-divoom-tv',
|
||||||
|
String $log_dir = '/var/log/fc-divoom-tv',
|
||||||
|
String $presentation_mode = 'PillarboxSquare',
|
||||||
|
String $startup_scene = 'bluejay-clock',
|
||||||
|
Boolean $reduced_motion = false,
|
||||||
|
) {
|
||||||
|
include profile::workstation::safe_account_exclusion
|
||||||
|
|
||||||
|
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
|
||||||
|
|
||||||
|
if $safe_account {
|
||||||
|
notify { 'fc-divoom-tv safe-account exclusion':
|
||||||
|
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom TV Pi profile refused to apply on operator workstation',
|
||||||
|
}
|
||||||
|
} elsif $ensure == 'absent' {
|
||||||
|
service { $service_name:
|
||||||
|
ensure => stopped,
|
||||||
|
enable => false,
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [
|
||||||
|
"/etc/systemd/system/${service_name}.service",
|
||||||
|
"/etc/systemd/system/${service_name}-hdmi.service",
|
||||||
|
'/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-launch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh',
|
||||||
|
'/etc/flowercore/divoom-tv.env',
|
||||||
|
]:
|
||||||
|
ensure => absent,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
case $facts['os']['family'] {
|
||||||
|
'Debian': {}
|
||||||
|
default: { fail("profile::pi::service::divoom_tv only supports Debian-family OS, got ${facts['os']['family']}") }
|
||||||
|
}
|
||||||
|
|
||||||
|
package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']:
|
||||||
|
ensure => installed,
|
||||||
|
}
|
||||||
|
|
||||||
|
group { $group:
|
||||||
|
ensure => present,
|
||||||
|
system => true,
|
||||||
|
}
|
||||||
|
|
||||||
|
user { $user:
|
||||||
|
ensure => present,
|
||||||
|
system => true,
|
||||||
|
gid => $group,
|
||||||
|
home => $state_dir,
|
||||||
|
managehome => false,
|
||||||
|
shell => '/usr/sbin/nologin',
|
||||||
|
require => Group[$group],
|
||||||
|
}
|
||||||
|
|
||||||
|
file { [$install_dir, $state_dir, $log_dir, '/etc/flowercore']:
|
||||||
|
ensure => directory,
|
||||||
|
owner => $user,
|
||||||
|
group => $group,
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/flowercore/divoom-tv.env':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
content => "FC_DIVOOM_TV_PRESENTATION_MODE=${presentation_mode}\nFC_DIVOOM_TV_START_SCENE=${startup_scene}\nFC_DIVOOM_TV_REDUCED_MOTION=${reduced_motion}\n",
|
||||||
|
require => File['/etc/flowercore'],
|
||||||
|
}
|
||||||
|
|
||||||
|
$script_map = {
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-prelaunch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-launch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh',
|
||||||
|
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi-respond.sh',
|
||||||
|
}
|
||||||
|
|
||||||
|
$script_map.each |$dest, $src| {
|
||||||
|
file { $dest:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0755',
|
||||||
|
source => "puppet:///modules/${src}",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$unit_map = {
|
||||||
|
"/etc/systemd/system/${service_name}.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service',
|
||||||
|
"/etc/systemd/system/${service_name}-hdmi.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi.service',
|
||||||
|
}
|
||||||
|
|
||||||
|
$unit_map.each |$dest, $src| {
|
||||||
|
file { $dest:
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
source => "puppet:///modules/${src}",
|
||||||
|
notify => Exec['fc-divoom-tv-systemd-reload'],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
file { '/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules':
|
||||||
|
ensure => file,
|
||||||
|
owner => 'root',
|
||||||
|
group => 'root',
|
||||||
|
mode => '0644',
|
||||||
|
source => 'puppet:///modules/profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules',
|
||||||
|
notify => Exec['fc-divoom-tv-udev-reload'],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-tv-systemd-reload':
|
||||||
|
command => '/usr/bin/systemctl daemon-reload',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
exec { 'fc-divoom-tv-udev-reload':
|
||||||
|
command => '/usr/bin/udevadm control --reload-rules',
|
||||||
|
refreshonly => true,
|
||||||
|
path => ['/usr/bin', '/bin'],
|
||||||
|
}
|
||||||
|
|
||||||
|
service { $service_name:
|
||||||
|
ensure => $service_ensure,
|
||||||
|
enable => $service_enabled,
|
||||||
|
require => [
|
||||||
|
File["/etc/systemd/system/${service_name}.service"],
|
||||||
|
File['/etc/flowercore/divoom-tv.env'],
|
||||||
|
File['/usr/local/bin/flowercore-divoom-tv-prelaunch.sh'],
|
||||||
|
File['/usr/local/bin/flowercore-divoom-tv-launch.sh'],
|
||||||
|
Exec['fc-divoom-tv-systemd-reload'],
|
||||||
|
],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
sleep 2
|
||||||
|
systemctl restart flowercore-divoom-tv.service
|
||||||
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
25
apps/fc-divoom-tv-pi/scripts/flowercore-divoom-tv-launch.sh
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||||
|
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||||
|
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||||
|
PRESENTATION_MODE="${FC_DIVOOM_TV_PRESENTATION_MODE:-PillarboxSquare}"
|
||||||
|
START_SCENE="${FC_DIVOOM_TV_START_SCENE:-bluejay-clock}"
|
||||||
|
REDUCED_MOTION="${FC_DIVOOM_TV_REDUCED_MOTION:-false}"
|
||||||
|
|
||||||
|
COMMON_ARGS=(
|
||||||
|
"--target=hdmi"
|
||||||
|
"--presentation-mode=${PRESENTATION_MODE}"
|
||||||
|
"--startup-scene=${START_SCENE}"
|
||||||
|
"--reduced-motion=${REDUCED_MOTION}"
|
||||||
|
"--state-dir=${STATE_DIR}"
|
||||||
|
"--log-dir=${LOG_DIR}"
|
||||||
|
)
|
||||||
|
|
||||||
|
if command -v cage >/dev/null 2>&1; then
|
||||||
|
exec cage -- "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -Is)] cage not found; launching FlowerCore.Divoom.Tv directly" >&2
|
||||||
|
exec "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
|
||||||
|
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
|
||||||
|
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
|
||||||
|
|
||||||
|
mkdir -p "${STATE_DIR}" "${LOG_DIR}"
|
||||||
|
|
||||||
|
if [[ ! -x "${APP_BIN}" ]]; then
|
||||||
|
echo "[$(date -Is)] missing executable ${APP_BIN}" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -d /sys/class/drm ]] && ! find /sys/class/drm -maxdepth 1 -name 'card*-HDMI-A-*' -print -quit | grep -q .; then
|
||||||
|
echo "[$(date -Is)] no HDMI connector visible yet; continuing so the app can wait for display" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v cage >/dev/null 2>&1; then
|
||||||
|
echo "[$(date -Is)] cage available for fullscreen Wayland launch"
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] cage not installed; direct launch fallback will be used" >&2
|
||||||
|
fi
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Settle DRM for 2s before restarting the fullscreen Avalonia renderer.
|
||||||
|
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-divoom-tv-hdmi.service"
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom TV HDMI hotplug responder
|
||||||
|
DefaultDependencies=no
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh
|
||||||
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
40
apps/fc-divoom-tv-pi/systemd/flowercore-divoom-tv.service
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Divoom TV HDMI Renderer (Avalonia fullscreen)
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target systemd-user-sessions.service
|
||||||
|
ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=fc-divoom-tv
|
||||||
|
Group=fc-divoom-tv
|
||||||
|
WorkingDirectory=/opt/flowercore/divoom-tv
|
||||||
|
EnvironmentFile=-/etc/flowercore/divoom-tv.env
|
||||||
|
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
|
||||||
|
Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv
|
||||||
|
RuntimeDirectory=fc-divoom-tv
|
||||||
|
RuntimeDirectoryMode=0700
|
||||||
|
ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh
|
||||||
|
ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
MemoryMax=2G
|
||||||
|
MemoryHigh=1500M
|
||||||
|
PrivateTmp=true
|
||||||
|
NoNewPrivileges=true
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=/var/lib/fc-divoom-tv /var/log/fc-divoom-tv /run/fc-divoom-tv
|
||||||
|
TTYPath=/dev/tty1
|
||||||
|
StandardInput=tty
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
TTYReset=yes
|
||||||
|
TTYVHangup=yes
|
||||||
|
TTYVTDisallocate=yes
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=graphical.target
|
||||||
169
apps/fc-library/fc-library.yaml
Normal file
169
apps/fc-library/fc-library.yaml
Normal file
@@ -0,0 +1,169 @@
|
|||||||
|
# FlowerCore.Library.Web GitOps adoption manifest.
|
||||||
|
#
|
||||||
|
# Authored from the already-live fc-library resources on 2026-06-04.
|
||||||
|
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||||
|
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: library-web-data
|
||||||
|
namespace: fc-library
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-library
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
volumeName: pvc-2690bae2-4ee0-417a-b95f-50ec5c632b63
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: library-web
|
||||||
|
namespace: fc-library
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-library
|
||||||
|
spec:
|
||||||
|
progressDeadlineSeconds: 600
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
prometheus.io/path: /metrics/prometheus
|
||||||
|
prometheus.io/port: "5000"
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: library-web-config
|
||||||
|
image: localhost/fc-library-web:v20260602-library-owned-deploy-fix1
|
||||||
|
imagePullPolicy: Never
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 3
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: library-web
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000
|
||||||
|
name: http
|
||||||
|
protocol: TCP
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 6
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources: {}
|
||||||
|
terminationMessagePath: /dev/termination-log
|
||||||
|
terminationMessagePolicy: File
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: data
|
||||||
|
dnsPolicy: ClusterFirst
|
||||||
|
restartPolicy: Always
|
||||||
|
schedulerName: default-scheduler
|
||||||
|
securityContext: {}
|
||||||
|
terminationGracePeriodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: library-web-data
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: library-web
|
||||||
|
namespace: fc-library
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-library
|
||||||
|
spec:
|
||||||
|
clusterIP: 10.43.179.63
|
||||||
|
clusterIPs:
|
||||||
|
- 10.43.179.63
|
||||||
|
internalTrafficPolicy: Cluster
|
||||||
|
ipFamilies:
|
||||||
|
- IPv4
|
||||||
|
ipFamilyPolicy: SingleStack
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 5000
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
sessionAffinity: None
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: library-web-tls
|
||||||
|
namespace: fc-library
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web-tls
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-library
|
||||||
|
spec:
|
||||||
|
dnsNames:
|
||||||
|
- library.iamworkin.lan
|
||||||
|
issuerRef:
|
||||||
|
kind: ClusterIssuer
|
||||||
|
name: step-ca-acme
|
||||||
|
secretName: library-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: library-web
|
||||||
|
namespace: fc-library
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: library-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-library
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`library.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: library-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: library-web-tls
|
||||||
@@ -87,6 +87,20 @@ spec:
|
|||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
prometheus.io/path: "/metrics"
|
prometheus.io/path: "/metrics"
|
||||||
spec:
|
spec:
|
||||||
|
# Use an explicit DNS policy so external FQDNs like api.anthropic.com are
|
||||||
|
# resolved directly instead of being expanded through the cluster search
|
||||||
|
# path that includes iamworkin.lan.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- fc-llm-bridge.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
securityContext:
|
securityContext:
|
||||||
fsGroup: 1654
|
fsGroup: 1654
|
||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
@@ -97,7 +111,7 @@ spec:
|
|||||||
# dotnet.exe publish -c Release -o deploy/app \
|
# dotnet.exe publish -c Release -o deploy/app \
|
||||||
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
|
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
|
||||||
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
|
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||||
image: localhost/fc-llm-bridge:v202604231520
|
image: localhost/fc-llm-bridge:v202604300022
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
@@ -116,6 +130,10 @@ spec:
|
|||||||
value: "default"
|
value: "default"
|
||||||
- name: FlowerCore__LlmBridge__DefaultAppName
|
- name: FlowerCore__LlmBridge__DefaultAppName
|
||||||
value: "agent-zero"
|
value: "agent-zero"
|
||||||
|
- name: FlowerCore__LlmBridge__UtilModel
|
||||||
|
value: "qwen2.5:1.5b"
|
||||||
|
- name: FlowerCore__LlmBridge__EmbedModel
|
||||||
|
value: "nomic-embed-text"
|
||||||
# Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys.
|
# Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys.
|
||||||
# Each field becomes a Secret key of the same name. The key-name
|
# Each field becomes a Secret key of the same name. The key-name
|
||||||
# lands in the auth principal's `fc.app` claim for ledger scoping.
|
# lands in the auth principal's `fc.app` claim for ledger scoping.
|
||||||
@@ -207,17 +225,6 @@ spec:
|
|||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 15
|
initialDelaySeconds: 15
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
# Lower ndots so external FQDNs like api.anthropic.com are tried BEFORE
|
|
||||||
# the ndots:5 default expands them through the cluster search path, which
|
|
||||||
# includes iamworkin.lan. CoreDNS has a `template IN A iamworkin.lan`
|
|
||||||
# wildcard that answers `api.anthropic.com.iamworkin.lan` with the
|
|
||||||
# Traefik VIP, which then serves a TRAEFIK-DEFAULT-CERT TLS cert and
|
|
||||||
# breaks egress to the real Anthropic API (memory:
|
|
||||||
# feedback_coredns_ndots_template_collision, generalized to external DNS).
|
|
||||||
dnsConfig:
|
|
||||||
options:
|
|
||||||
- name: ndots
|
|
||||||
value: "2"
|
|
||||||
volumes:
|
volumes:
|
||||||
- name: data
|
- name: data
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
|
|||||||
@@ -69,16 +69,14 @@ spec:
|
|||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
failureThreshold: 3
|
failureThreshold: 3
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
|||||||
171
apps/fc-redis/fc-redis.yaml
Normal file
171
apps/fc-redis/fc-redis.yaml
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
# fc-redis — SignalR backplane for cross-product event bus
|
||||||
|
#
|
||||||
|
# Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
|
||||||
|
# not Phase C as originally drafted. Operator directive: "Redis can be
|
||||||
|
# deployed just fine as it's another FlowerCore technology we'll want to
|
||||||
|
# manage."
|
||||||
|
#
|
||||||
|
# Phase A scope (this file):
|
||||||
|
# - Single Redis 7.x Alpine pod
|
||||||
|
# - 1Gi Longhorn RWO PVC for AOF persistence
|
||||||
|
# - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
|
||||||
|
# - No AUTH (in-cluster only; not exposed externally)
|
||||||
|
# - No IngressRoute (backplane is server-to-server only)
|
||||||
|
#
|
||||||
|
# Consumers (Phase A IMPL across FC services):
|
||||||
|
# - FlowerCore.Signage.Web (OpsConsoleHub)
|
||||||
|
# - FlowerCore.Scoreboard.Web (ScoreboardHub)
|
||||||
|
# - FlowerCore.SignalControl.Web
|
||||||
|
# - FlowerCore.DMS.Web
|
||||||
|
# - Any other product joining the cross-product event bus
|
||||||
|
#
|
||||||
|
# Each consumer adds:
|
||||||
|
# services.AddSignalR()
|
||||||
|
# .AddStackExchangeRedis(
|
||||||
|
# "redis.fc-redis.svc.cluster.local:6379",
|
||||||
|
# opts => opts.Configuration.ChannelPrefix =
|
||||||
|
# StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
|
||||||
|
#
|
||||||
|
# Phase B / C follow-ons (out of scope here):
|
||||||
|
# - Redis Sentinel for HA (3-node)
|
||||||
|
# - AUTH password from 1Password Connect (rotate via /rotate-password)
|
||||||
|
# - redis_exporter sidecar for Prometheus scrape
|
||||||
|
# - Network policies restricting which namespaces can dial 6379
|
||||||
|
#
|
||||||
|
# Design: docs/signage/operations-console-phase-2-design.md §3.5
|
||||||
|
# Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
|
||||||
|
# Memory: feedback_blooming_ui_pattern_no_iframes
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-data
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-redis-config
|
||||||
|
namespace: fc-redis
|
||||||
|
data:
|
||||||
|
redis.conf: |
|
||||||
|
# Phase A — minimal config; no AUTH, no replication.
|
||||||
|
bind 0.0.0.0
|
||||||
|
protected-mode no
|
||||||
|
port 6379
|
||||||
|
tcp-backlog 511
|
||||||
|
timeout 0
|
||||||
|
tcp-keepalive 300
|
||||||
|
|
||||||
|
# Persistence: AOF (fsync every second is the standard SignalR-backplane
|
||||||
|
# durability sweet spot — the backplane only needs to survive Redis
|
||||||
|
# restarts, not absolute zero loss).
|
||||||
|
appendonly yes
|
||||||
|
appendfsync everysec
|
||||||
|
auto-aof-rewrite-percentage 100
|
||||||
|
auto-aof-rewrite-min-size 64mb
|
||||||
|
|
||||||
|
# Reasonable defaults — let Redis pick most things.
|
||||||
|
maxmemory-policy allkeys-lru
|
||||||
|
maxmemory 256mb
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
loglevel notice
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: fc-redis
|
||||||
|
namespace: fc-redis
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
strategy:
|
||||||
|
type: Recreate # RWO PVC; do not do rolling update
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: fc-redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: fc-redis
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 999 # redis:7-alpine default uid
|
||||||
|
runAsGroup: 999
|
||||||
|
fsGroup: 999
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
command: ["redis-server", "/etc/redis/redis.conf"]
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
containerPort: 6379
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "50m"
|
||||||
|
memory: "128Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "384Mi"
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: config
|
||||||
|
mountPath: /etc/redis
|
||||||
|
readOnly: true
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 6379
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
securityContext:
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop: [ALL]
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: fc-redis-data
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: fc-redis-config
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
namespace: fc-redis
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: fc-redis
|
||||||
|
ports:
|
||||||
|
- name: redis
|
||||||
|
port: 6379
|
||||||
|
targetPort: 6379
|
||||||
|
protocol: TCP
|
||||||
170
apps/fc-retail/fc-retail.yaml
Normal file
170
apps/fc-retail/fc-retail.yaml
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
# FlowerCore.Retail.Web GitOps adoption manifest.
|
||||||
|
#
|
||||||
|
# Authored from the already-live fc-retail resources on 2026-06-04.
|
||||||
|
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
|
||||||
|
# ArgoCD adopts in place instead of replacing the workload or data volume.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: retail-web-data
|
||||||
|
namespace: fc-retail
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-retail
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 1Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
volumeName: pvc-3d40b336-eab4-41b3-812c-d5e9413ce0ab
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: retail-web
|
||||||
|
namespace: fc-retail
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-retail
|
||||||
|
spec:
|
||||||
|
progressDeadlineSeconds: 600
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
|
||||||
|
prometheus.io/path: /metrics/prometheus
|
||||||
|
prometheus.io/port: "5000"
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: retail-web-config
|
||||||
|
image: localhost/fc-retail-web:v20260602-retail-owned-deploy-fix5
|
||||||
|
imagePullPolicy: Never
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 3
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: retail-web
|
||||||
|
ports:
|
||||||
|
- containerPort: 5000
|
||||||
|
name: http
|
||||||
|
protocol: TCP
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 6
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: 5000
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources: {}
|
||||||
|
terminationMessagePath: /dev/termination-log
|
||||||
|
terminationMessagePolicy: File
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /data
|
||||||
|
name: data
|
||||||
|
dnsPolicy: ClusterFirst
|
||||||
|
restartPolicy: Always
|
||||||
|
schedulerName: default-scheduler
|
||||||
|
securityContext: {}
|
||||||
|
terminationGracePeriodSeconds: 30
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: retail-web-data
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: retail-web
|
||||||
|
namespace: fc-retail
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-retail
|
||||||
|
spec:
|
||||||
|
clusterIP: 10.43.239.8
|
||||||
|
clusterIPs:
|
||||||
|
- 10.43.239.8
|
||||||
|
internalTrafficPolicy: Cluster
|
||||||
|
ipFamilies:
|
||||||
|
- IPv4
|
||||||
|
ipFamilyPolicy: SingleStack
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
protocol: TCP
|
||||||
|
targetPort: 5000
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
sessionAffinity: None
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: retail-web-tls
|
||||||
|
namespace: fc-retail
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web-tls
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-retail
|
||||||
|
spec:
|
||||||
|
dnsNames:
|
||||||
|
- retail.iamworkin.lan
|
||||||
|
issuerRef:
|
||||||
|
kind: ClusterIssuer
|
||||||
|
name: step-ca-acme
|
||||||
|
secretName: retail-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: retail-web
|
||||||
|
namespace: fc-retail
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: retail-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
argocd.argoproj.io/instance: infra-fc-retail
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`retail.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: retail-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: retail-web-tls
|
||||||
14
apps/fc-signage-appletv/README.md
Normal file
14
apps/fc-signage-appletv/README.md
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# fc-signage-appletv
|
||||||
|
|
||||||
|
Apple TV signage is a sealed appliance running the `FlowerCore.Signage.Agent.AppleTv` tvOS app per ADR-134.
|
||||||
|
|
||||||
|
This ApplicationSet entry is documentation and inventory metadata only. It intentionally creates no `Deployment`, `Service`, or `Pod`.
|
||||||
|
|
||||||
|
The Apple TV app connects outbound to existing FC.Signage.Web surfaces:
|
||||||
|
|
||||||
|
- `https://signage.iamworkin.lan/hub/signage` for SignalR live status.
|
||||||
|
- `GET /api/v1/nodes/{nodeId}/state` for the 30 second polling fallback.
|
||||||
|
- `POST /api/v1/nodes/register` and `POST /api/v1/nodes/{nodeId}/enroll` for pairing and mTLS enrollment.
|
||||||
|
- `POST /api/v1/nodes/{nodeId}/heartbeat` for metrics, current content identity, and local audit excerpts.
|
||||||
|
|
||||||
|
Distribution is via Apple Developer Enterprise Program or TestFlight plus FC.Distribution / UpdateCenter publishing once Apple credentials are available.
|
||||||
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
5
apps/fc-signage-appletv/kustomization.yaml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- manifest.yaml
|
||||||
26
apps/fc-signage-appletv/manifest.yaml
Normal file
26
apps/fc-signage-appletv/manifest.yaml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Apple TV signage is a sealed tvOS appliance. This ArgoCD app intentionally
|
||||||
|
# carries documentation metadata only; no Deployment, Service, or Pod resources
|
||||||
|
# are created for the player.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-signage-appletv-docs
|
||||||
|
namespace: fc-signage
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-signage-appletv
|
||||||
|
app.kubernetes.io/part-of: flowercore-signage
|
||||||
|
flowercore.io/manifest-kind: docs-only
|
||||||
|
data:
|
||||||
|
README: |
|
||||||
|
FlowerCore.Signage.Agent.AppleTv is distributed through Apple Developer
|
||||||
|
Enterprise Program or TestFlight, not Kubernetes.
|
||||||
|
|
||||||
|
The app connects outbound to FC.Signage.Web:
|
||||||
|
- SignalR: https://signage.iamworkin.lan/hub/signage
|
||||||
|
- Polling fallback: GET /api/v1/nodes/{nodeId}/state
|
||||||
|
- Enrollment: POST /api/v1/nodes/{nodeId}/enroll
|
||||||
|
- Heartbeat: POST /api/v1/nodes/{nodeId}/heartbeat
|
||||||
|
|
||||||
|
This placeholder gives ArgoCD and inventory dashboards a first-class
|
||||||
|
Apple TV signage app entry without creating runtime pods.
|
||||||
17
apps/fc-signage-pi-player/README.md
Normal file
17
apps/fc-signage-pi-player/README.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# FlowerCore Signage Pi Player
|
||||||
|
|
||||||
|
Phase 1 Raspberry Pi signage player packaging for Chromium kiosk deployments.
|
||||||
|
This bundle is intentionally air-gap friendly: systemd units, shell scripts,
|
||||||
|
udev rules, and Chromium managed policy are all checked into the repo and are
|
||||||
|
installed by `FlowerCore.Puppet`.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
- Bootstrap a stable node identity and mTLS client certificate.
|
||||||
|
- Launch Chromium in kiosk mode against `FC.Signage.Web` player routes.
|
||||||
|
- Restart the kiosk on HDMI hotplug.
|
||||||
|
- Renew mTLS certificates daily when fewer than 30 days remain.
|
||||||
|
- Detect display capabilities at boot, daily, and on HDMI hotplug.
|
||||||
|
|
||||||
|
Phase 2 native Avalonia rendering is documented separately in Notes and remains
|
||||||
|
deferred.
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
{
|
||||||
|
"AutofillAddressEnabled": false,
|
||||||
|
"AutofillCreditCardEnabled": false,
|
||||||
|
"PasswordManagerEnabled": false,
|
||||||
|
"BrowserSignin": 0,
|
||||||
|
"MetricsReportingEnabled": false,
|
||||||
|
"SafeBrowsingProtectionLevel": 0,
|
||||||
|
"DefaultNotificationsSetting": 2,
|
||||||
|
"DefaultPopupsSetting": 2,
|
||||||
|
"BackgroundModeEnabled": false,
|
||||||
|
"DefaultBrowserSettingEnabled": false,
|
||||||
|
"PromotionalTabsEnabled": false,
|
||||||
|
"CommandLineFlagSecurityWarningsEnabled": false,
|
||||||
|
"ExtensionInstallBlocklist": ["*"]
|
||||||
|
}
|
||||||
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
132
apps/fc-signage-pi-player/scripts/fc-signage-detect-display
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
|
||||||
|
CONNECTORS=()
|
||||||
|
for dir in /sys/class/drm/card*-HDMI-A-*; do
|
||||||
|
[[ -e "$dir/status" ]] || continue
|
||||||
|
if [[ "$(cat "$dir/status")" == "connected" ]]; then
|
||||||
|
CONNECTORS+=("$(basename "$dir")")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ ${#CONNECTORS[@]} -eq 0 ]]; then
|
||||||
|
CAPABILITIES_JSON=$(jq -n --arg id "$NODE_ID" '{
|
||||||
|
nodeId: $id,
|
||||||
|
platform: "linux-arm64-pi",
|
||||||
|
displayConnected: false,
|
||||||
|
detectedAt: (now | todate),
|
||||||
|
note: "No HDMI display detected"
|
||||||
|
}')
|
||||||
|
else
|
||||||
|
PRIMARY="${CONNECTORS[0]}"
|
||||||
|
EDID_PATH="/sys/class/drm/${PRIMARY}/edid"
|
||||||
|
WIDTH=0
|
||||||
|
HEIGHT=0
|
||||||
|
REFRESH=60
|
||||||
|
HDR=false
|
||||||
|
AUDIO_HDMI=false
|
||||||
|
MFG=""
|
||||||
|
MODEL=""
|
||||||
|
PHYSICAL_SIZE=null
|
||||||
|
|
||||||
|
if [[ -s "$EDID_PATH" ]] && command -v edid-decode >/dev/null 2>&1; then
|
||||||
|
EDID_INFO=$(edid-decode < "$EDID_PATH" 2>/dev/null || true)
|
||||||
|
MFG=$(echo "$EDID_INFO" | grep -m1 -oP 'Manufacturer:\s*\K\S+' || true)
|
||||||
|
MODEL=$(echo "$EDID_INFO" | grep -m1 -oP 'Model:\s*\K\S+' || true)
|
||||||
|
PREF=$(echo "$EDID_INFO" | grep -m1 -oP '\d+x\d+\s*@\s*\d+(?:\.\d+)?\s*Hz' || true)
|
||||||
|
if [[ -n "$PREF" ]]; then
|
||||||
|
WIDTH=$(echo "$PREF" | grep -oP '^\d+')
|
||||||
|
HEIGHT=$(echo "$PREF" | grep -oP 'x\K\d+')
|
||||||
|
REFRESH=$(echo "$PREF" | grep -oP '@\s*\K[\d.]+' | cut -d. -f1)
|
||||||
|
fi
|
||||||
|
if echo "$EDID_INFO" | grep -qiE 'HDR (Static|Dynamic) Metadata Block'; then HDR=true; fi
|
||||||
|
if echo "$EDID_INFO" | grep -qiE 'CEA Audio Block|Audio Format Descriptor'; then AUDIO_HDMI=true; fi
|
||||||
|
PH_W=$(echo "$EDID_INFO" | grep -m1 -oP 'Maximum image size:\s*\K\d+\s*cm\s*x\s*\d+' || true)
|
||||||
|
if [[ -n "$PH_W" ]]; then
|
||||||
|
PH_CM_W=$(echo "$PH_W" | grep -oP '^\d+')
|
||||||
|
PH_CM_H=$(echo "$PH_W" | grep -oP 'x\s*\K\d+')
|
||||||
|
if (( PH_CM_W > 0 && PH_CM_H > 0 )); then
|
||||||
|
PHYSICAL_SIZE=$(awk -v w="$PH_CM_W" -v h="$PH_CM_H" 'BEGIN { printf "%.1f", sqrt(w*w + h*h)/2.54 }')
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$WIDTH" == "0" ]] && command -v kmsprint >/dev/null 2>&1; then
|
||||||
|
KMS=$(kmsprint 2>/dev/null | grep -A2 "$PRIMARY" | grep -oP '\d+x\d+' | head -1 || true)
|
||||||
|
if [[ -n "$KMS" ]]; then
|
||||||
|
WIDTH=$(echo "$KMS" | grep -oP '^\d+')
|
||||||
|
HEIGHT=$(echo "$KMS" | grep -oP 'x\K\d+')
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
AUDIO_ALSA=false
|
||||||
|
if aplay -l 2>/dev/null | grep -qi 'card.*HDMI'; then AUDIO_ALSA=true; fi
|
||||||
|
HAS_AUDIO=false
|
||||||
|
if [[ "$AUDIO_HDMI" == "true" && "$AUDIO_ALSA" == "true" ]]; then HAS_AUDIO=true; fi
|
||||||
|
|
||||||
|
CAPABILITIES_JSON=$(jq -n \
|
||||||
|
--arg id "$NODE_ID" \
|
||||||
|
--argjson w "$WIDTH" \
|
||||||
|
--argjson h "$HEIGHT" \
|
||||||
|
--argjson r "$REFRESH" \
|
||||||
|
--argjson hdr "$HDR" \
|
||||||
|
--argjson audio "$HAS_AUDIO" \
|
||||||
|
--arg connector "$PRIMARY" \
|
||||||
|
--arg mfg "$MFG" \
|
||||||
|
--arg model "$MODEL" \
|
||||||
|
--argjson size "$PHYSICAL_SIZE" \
|
||||||
|
'{
|
||||||
|
nodeId: $id,
|
||||||
|
platform: "linux-arm64-pi",
|
||||||
|
displayConnected: true,
|
||||||
|
detectedAt: (now | todate),
|
||||||
|
hardware: {
|
||||||
|
maxResolution: { width: $w, height: $h },
|
||||||
|
nativeResolution: { width: $w, height: $h },
|
||||||
|
refreshRateHz: $r,
|
||||||
|
colorDepth: ($hdr | if . then "Color30Hdr" else "Color24" end),
|
||||||
|
hasAudioOutput: $audio,
|
||||||
|
audioChannelCount: ($audio | if . then 2 else 0 end),
|
||||||
|
physicalSizeInches: $size,
|
||||||
|
connector: $connector,
|
||||||
|
manufacturer: $mfg,
|
||||||
|
modelName: $model
|
||||||
|
},
|
||||||
|
render: { codecs: ["h264", "vp9", "mp4"] }
|
||||||
|
}')
|
||||||
|
fi
|
||||||
|
|
||||||
|
ENDPOINT_CANDIDATES=(
|
||||||
|
"${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/capabilities"
|
||||||
|
"${SIGNAGE_URL}/api/v1/displays/${NODE_ID}/capability-profile"
|
||||||
|
)
|
||||||
|
|
||||||
|
SUCCESS=false
|
||||||
|
for url in "${ENDPOINT_CANDIDATES[@]}"; do
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/cap-response.json -w "%{http_code}" \
|
||||||
|
--max-time 10 \
|
||||||
|
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||||
|
-X POST "$url" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$CAPABILITIES_JSON" || echo "000")
|
||||||
|
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" || "$HTTP_STATUS" == "204" ]]; then
|
||||||
|
SUCCESS=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
if [[ "$SUCCESS" != "true" ]]; then
|
||||||
|
echo "[$(date -Is)] capability declare: no endpoint accepted the profile; logging locally" \
|
||||||
|
| tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
echo "$CAPABILITIES_JSON" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] capability declare: ok ($url)" | tee -a /var/log/fc-signage-player/capabilities.log
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$CAPABILITIES_JSON"
|
||||||
@@ -0,0 +1,144 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
SETUP_CODE_FILE="/etc/flowercore/signage-setup-code"
|
||||||
|
|
||||||
|
mkdir -p /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||||
|
chown fc-signage:fc-signage /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
||||||
|
chmod 0750 "$CERT_DIR"
|
||||||
|
|
||||||
|
if [[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]; then
|
||||||
|
ENROLLED=$(jq -r '.enrolledAt // empty' "$NODE_JSON")
|
||||||
|
if [[ -n "$ENROLLED" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: already enrolled at $ENROLLED; skipping"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ -s "$NODE_JSON" ]]; then
|
||||||
|
NODE_UUID=$(jq -r '.nodeUuid // empty' "$NODE_JSON")
|
||||||
|
MACHINE_ID=$(jq -r '.machineId // empty' "$NODE_JSON")
|
||||||
|
else
|
||||||
|
NODE_UUID=$(uuidgen)
|
||||||
|
MACHINE_ID=$(echo "$NODE_UUID" | tr -d '-' | cut -c1-16)
|
||||||
|
jq -n --arg uuid "$NODE_UUID" --arg machine "$MACHINE_ID" --arg host "$(hostname -f)" --arg ts "$(date -Is)" \
|
||||||
|
'{nodeUuid: $uuid, machineId: $machine, hostname: $host, platform: "linux-arm64-pi", createdAt: $ts}' \
|
||||||
|
> "$NODE_JSON"
|
||||||
|
chmod 0640 "$NODE_JSON"
|
||||||
|
chown fc-signage:fc-signage "$NODE_JSON"
|
||||||
|
fi
|
||||||
|
|
||||||
|
SETUP_CODE=""
|
||||||
|
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||||
|
SETUP_CODE=$(tr -d '\r\n\t ' < "$SETUP_CODE_FILE")
|
||||||
|
fi
|
||||||
|
|
||||||
|
MODEL=$(tr -d '\0' < /sys/firmware/devicetree/base/model 2>/dev/null || echo Unknown)
|
||||||
|
REG_PAYLOAD=$(jq -n \
|
||||||
|
--arg machine "$MACHINE_ID" \
|
||||||
|
--arg name "$(hostname -f)" \
|
||||||
|
--arg setup "$SETUP_CODE" \
|
||||||
|
--arg resolution "1920x1080" \
|
||||||
|
--arg model "$MODEL" \
|
||||||
|
'{
|
||||||
|
machineId: $machine,
|
||||||
|
name: $name,
|
||||||
|
setupCode: ($setup | if . == "" then null else . end),
|
||||||
|
resolution: $resolution,
|
||||||
|
hardwareModel: $model,
|
||||||
|
platform: "linux-arm64-pi"
|
||||||
|
}')
|
||||||
|
|
||||||
|
for attempt in 1 2; do
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/register-response.json -w "%{http_code}" \
|
||||||
|
--max-time 15 \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/register" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$REG_PAYLOAD" || echo "000")
|
||||||
|
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
echo "[$(date -Is)] bootstrap: register attempt $attempt returned $HTTP_STATUS" >&2
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: register failed after 2 attempts" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODE_ID=$(jq -r '.nodeId // empty' /tmp/register-response.json)
|
||||||
|
if [[ -z "$NODE_ID" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: register response did not include nodeId" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
jq --arg id "$NODE_ID" '.nodeId = $id' "$NODE_JSON" > "${NODE_JSON}.tmp" && mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||||
|
|
||||||
|
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
||||||
|
curl -sk -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/approve-via-setup-code" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{\"setupCode\":\"${SETUP_CODE}\"}" \
|
||||||
|
-o /dev/null || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
STATUS=""
|
||||||
|
DEADLINE=$(( $(date +%s) + 1800 ))
|
||||||
|
while (( $(date +%s) < DEADLINE )); do
|
||||||
|
STATUS=$(curl -sk --max-time 5 "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/status" | jq -r '.status // empty')
|
||||||
|
if [[ "$STATUS" == "Approved" || "$STATUS" == "Enrolled" || "$STATUS" == "Online" ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 15
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ "$STATUS" != "Approved" && "$STATUS" != "Enrolled" && "$STATUS" != "Online" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: approval not granted within 30min budget" >&2
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
|
KEY_PATH="${CERT_DIR}/client.key"
|
||||||
|
CSR_PATH="${CERT_DIR}/client.csr"
|
||||||
|
openssl ecparam -genkey -name prime256v1 -out "$KEY_PATH"
|
||||||
|
openssl req -new -key "$KEY_PATH" -out "$CSR_PATH" \
|
||||||
|
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||||
|
|
||||||
|
ENROLL_PAYLOAD=$(jq -n --arg csr "$(cat "$CSR_PATH")" '{certificateSigningRequest: $csr}')
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/enroll-response.json -w "%{http_code}" \
|
||||||
|
--max-time 15 \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/enroll" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$ENROLL_PAYLOAD")
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] bootstrap: enroll failed with HTTP $HTTP_STATUS" >&2
|
||||||
|
exit 4
|
||||||
|
fi
|
||||||
|
|
||||||
|
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/client.crt"
|
||||||
|
jq -r '.caCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/ca-chain.pem"
|
||||||
|
P12_PASS=$(openssl rand -hex 24)
|
||||||
|
echo -n "$P12_PASS" > "${CERT_DIR}/client.p12.pass"
|
||||||
|
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||||
|
|
||||||
|
openssl pkcs12 -export \
|
||||||
|
-inkey "$KEY_PATH" \
|
||||||
|
-in "${CERT_DIR}/client.crt" \
|
||||||
|
-certfile "${CERT_DIR}/ca-chain.pem" \
|
||||||
|
-out "${CERT_DIR}/client.p12" \
|
||||||
|
-password "pass:${P12_PASS}"
|
||||||
|
|
||||||
|
chown fc-signage:fc-signage "${CERT_DIR}"/* "$NODE_JSON"
|
||||||
|
chmod 0640 "${CERT_DIR}/client.p12" "${CERT_DIR}/client.crt" "${CERT_DIR}/ca-chain.pem" "$KEY_PATH"
|
||||||
|
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
||||||
|
|
||||||
|
EXPIRY=$(openssl x509 -in "${CERT_DIR}/client.crt" -enddate -noout | sed 's/notAfter=//')
|
||||||
|
jq --arg ts "$(date -Is)" --arg exp "$EXPIRY" \
|
||||||
|
'.enrolledAt = $ts | .certExpiry = $exp' "$NODE_JSON" > "${NODE_JSON}.tmp" \
|
||||||
|
&& mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
||||||
|
|
||||||
|
systemctl start flowercore-signage-detect-display.service || true
|
||||||
|
systemctl start flowercore-signage-player-pi.service || true
|
||||||
|
echo "[$(date -Is)] bootstrap: enrolled and kiosk started (NodeId=${NODE_ID})"
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
sleep 2
|
||||||
|
systemctl start flowercore-signage-detect-display.service || true
|
||||||
|
systemctl restart flowercore-signage-player-pi.service
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
|
||||||
|
CERT_THUMB=$(openssl pkcs12 -in "$CERT_DIR/client.p12" -passin file:"$CERT_DIR/client.p12.pass" -nodes -nokeys 2>/dev/null \
|
||||||
|
| openssl x509 -fingerprint -sha256 -noout \
|
||||||
|
| sed 's/.*=//' \
|
||||||
|
| tr -d ':')
|
||||||
|
|
||||||
|
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}/embed?token=${CERT_THUMB}"
|
||||||
|
HTTP_STATUS=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 5 \
|
||||||
|
--cert-type P12 --cert "$CERT_DIR/client.p12:$(cat "$CERT_DIR/client.p12.pass")" \
|
||||||
|
"$PLAYER_URL" || echo "000")
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "301" && "$HTTP_STATUS" != "302" ]]; then
|
||||||
|
echo "[$(date -Is)] /embed returned $HTTP_STATUS; falling back to /player/${NODE_ID}" \
|
||||||
|
>> /var/log/fc-signage-player/url-divergence.log
|
||||||
|
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}?token=${CERT_THUMB}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
exec chromium-browser \
|
||||||
|
--kiosk \
|
||||||
|
--noerrdialogs \
|
||||||
|
--disable-infobars \
|
||||||
|
--disable-translate \
|
||||||
|
--disable-features=TranslateUI,InfiniteSessionRestore \
|
||||||
|
--autoplay-policy=no-user-gesture-required \
|
||||||
|
--password-store=basic \
|
||||||
|
--user-data-dir=/var/lib/fc-signage-player/profile \
|
||||||
|
--disk-cache-dir=/var/lib/fc-signage-player/cache \
|
||||||
|
--disk-cache-size=104857600 \
|
||||||
|
--no-first-run \
|
||||||
|
--no-default-browser-check \
|
||||||
|
--check-for-update-interval=2592000 \
|
||||||
|
--enable-features=OverlayScrollbar \
|
||||||
|
--start-fullscreen \
|
||||||
|
--window-position=0,0 \
|
||||||
|
--window-size=1920,1080 \
|
||||||
|
"$PLAYER_URL"
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
mkdir -p /var/log/fc-signage-player
|
||||||
|
|
||||||
|
for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass; do
|
||||||
|
if [[ ! -r "$f" ]]; then
|
||||||
|
echo "[$(date -Is)] prelaunch: missing or unreadable $f" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if openssl pkcs12 -in /etc/fc-signage-player/client.p12 -passin file:/etc/fc-signage-player/client.p12.pass -nokeys -clcerts 2>/dev/null \
|
||||||
|
| openssl x509 -checkend $((7*24*3600)) -noout; then
|
||||||
|
:
|
||||||
|
else
|
||||||
|
echo "[$(date -Is)] prelaunch: client cert expires within 7 days" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "[$(date -Is)] prelaunch: ok" | tee -a /var/log/fc-signage-player/prelaunch.log
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
CERT_DIR="/etc/fc-signage-player"
|
||||||
|
NODE_JSON="/etc/flowercore/signage-node.json"
|
||||||
|
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
||||||
|
|
||||||
|
[[ -s "$CERT_DIR/client.crt" ]] || { echo "no cert to renew"; exit 0; }
|
||||||
|
|
||||||
|
if openssl x509 -in "$CERT_DIR/client.crt" -checkend $((30*24*3600)) -noout; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
||||||
|
NEW_KEY="$CERT_DIR/client.key.new"
|
||||||
|
NEW_CSR="$CERT_DIR/client.csr.new"
|
||||||
|
|
||||||
|
openssl ecparam -genkey -name prime256v1 -out "$NEW_KEY"
|
||||||
|
openssl req -new -key "$NEW_KEY" -out "$NEW_CSR" \
|
||||||
|
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
||||||
|
|
||||||
|
HTTP_STATUS=$(curl -sk -o /tmp/renew-response.json -w "%{http_code}" \
|
||||||
|
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
||||||
|
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/renew" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "$(jq -n --arg csr "$(cat "$NEW_CSR")" '{certificateSigningRequest: $csr}')")
|
||||||
|
|
||||||
|
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
||||||
|
echo "[$(date -Is)] renew: failed HTTP $HTTP_STATUS; leaving old cert in place" >&2
|
||||||
|
exit 5
|
||||||
|
fi
|
||||||
|
|
||||||
|
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/renew-response.json > "$CERT_DIR/client.crt.new"
|
||||||
|
jq -r '.caCertificatePem' /tmp/renew-response.json > "$CERT_DIR/ca-chain.pem.new"
|
||||||
|
P12_PASS=$(cat "$CERT_DIR/client.p12.pass")
|
||||||
|
openssl pkcs12 -export -inkey "$NEW_KEY" -in "$CERT_DIR/client.crt.new" \
|
||||||
|
-certfile "$CERT_DIR/ca-chain.pem.new" \
|
||||||
|
-out "$CERT_DIR/client.p12.new" -password "pass:${P12_PASS}"
|
||||||
|
|
||||||
|
mv "$CERT_DIR/client.key.new" "$CERT_DIR/client.key"
|
||||||
|
mv "$CERT_DIR/client.crt.new" "$CERT_DIR/client.crt"
|
||||||
|
mv "$CERT_DIR/ca-chain.pem.new" "$CERT_DIR/ca-chain.pem"
|
||||||
|
mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"
|
||||||
|
|
||||||
|
chown fc-signage:fc-signage "$CERT_DIR"/client.*
|
||||||
|
systemctl restart flowercore-signage-player-pi.service
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
|
||||||
|
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: first-boot identity + mTLS enrollment
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target
|
||||||
|
Before=flowercore-signage-player-pi.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-bootstrap.sh
|
||||||
|
RemainAfterExit=yes
|
||||||
|
StandardOutput=journal
|
||||||
|
StandardError=journal
|
||||||
|
TimeoutStartSec=2100
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: detect connected display + declare capabilities
|
||||||
|
After=flowercore-signage-bootstrap.service
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
User=fc-signage
|
||||||
|
ExecStart=/usr/local/bin/fc-signage-detect-display
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Daily FlowerCore Signage Pi display capability redeclaration
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnCalendar=daily
|
||||||
|
RandomizedDelaySec=1h
|
||||||
|
Persistent=true
|
||||||
|
OnBootSec=30s
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi Player HDMI hotplug responder
|
||||||
|
DefaultDependencies=no
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-hdmi-respond.sh
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Digital Signage Pi Player (Chromium kiosk)
|
||||||
|
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/appletv-pi-signage-agents-design.md
|
||||||
|
Wants=network-online.target
|
||||||
|
After=network-online.target graphical.target
|
||||||
|
ConditionPathExists=/etc/flowercore/signage-node.json
|
||||||
|
ConditionPathExists=/etc/fc-signage-player/client.p12
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=simple
|
||||||
|
User=fc-signage
|
||||||
|
Group=fc-signage
|
||||||
|
WorkingDirectory=/var/lib/fc-signage-player
|
||||||
|
EnvironmentFile=-/etc/flowercore/signage-player.env
|
||||||
|
ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-launch.sh
|
||||||
|
Restart=always
|
||||||
|
RestartSec=10s
|
||||||
|
StartLimitBurst=5
|
||||||
|
StartLimitIntervalSec=300s
|
||||||
|
MemoryMax=2G
|
||||||
|
MemoryHigh=1500M
|
||||||
|
ProtectSystem=strict
|
||||||
|
ProtectHome=true
|
||||||
|
ReadWritePaths=/var/lib/fc-signage-player /var/log/fc-signage-player
|
||||||
|
PrivateTmp=true
|
||||||
|
NoNewPrivileges=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=graphical.target
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=FlowerCore Signage Pi: cert renewal worker
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/usr/local/bin/flowercore-signage-renew-cert.sh
|
||||||
@@ -0,0 +1,10 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Daily check for FlowerCore Signage Pi cert renewal
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnCalendar=daily
|
||||||
|
RandomizedDelaySec=2h
|
||||||
|
Persistent=true
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=timers.target
|
||||||
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
22
apps/fc-signage-pi-player/tests/display_capability.bats
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
|
||||||
|
script="$(cat "$DETECT")"
|
||||||
|
[[ "$script" == *"displayConnected: false"* ]]
|
||||||
|
[[ "$script" == *"No HDMI display detected"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
|
||||||
|
script="$(cat "$DETECT")"
|
||||||
|
[[ "$script" == *"edid-decode"* ]]
|
||||||
|
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
|
||||||
|
[[ "$script" == *"kmsprint"* ]]
|
||||||
|
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
|
||||||
|
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
|
||||||
|
[[ "$script" == *"capabilities.log"* ]]
|
||||||
|
}
|
||||||
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
64
apps/fc-signage-pi-player/tests/identity_bootstrap.bats
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
|
||||||
|
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap is idempotent when node is already enrolled" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
|
||||||
|
[[ "$script" == *"already enrolled"* ]]
|
||||||
|
[[ "$script" == *"exit 0"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap generates a stable node uuid and machine id" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"uuidgen"* ]]
|
||||||
|
[[ "$script" == *"nodeUuid"* ]]
|
||||||
|
[[ "$script" == *"machineId"* ]]
|
||||||
|
[[ "$script" == *"cut -c1-16"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap posts to the canonical register endpoint" {
|
||||||
|
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
|
||||||
|
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap retries registration once for first-call races" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"for attempt in 1 2"* ]]
|
||||||
|
[[ "$script" == *"register attempt \$attempt returned"* ]]
|
||||||
|
[[ "$script" == *"sleep 5"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap supports setup-code approval with manual polling fallback" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"signage-setup-code"* ]]
|
||||||
|
[[ "$script" == *"approve-via-setup-code"* ]]
|
||||||
|
[[ "$script" == *"+ 1800"* ]]
|
||||||
|
[[ "$script" == *"sleep 15"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
|
||||||
|
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
|
||||||
|
script="$(cat "$BOOTSTRAP")"
|
||||||
|
[[ "$script" == *"openssl pkcs12 -export"* ]]
|
||||||
|
[[ "$script" == *"client.p12.pass"* ]]
|
||||||
|
[[ "$script" == *"chmod 0640"* ]]
|
||||||
|
[[ "$script" == *"chmod 0600"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
|
||||||
|
script="$(cat "$RENEW")"
|
||||||
|
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
|
||||||
|
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
|
||||||
|
[[ "$script" == *"client.key.new"* ]]
|
||||||
|
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
|
||||||
|
}
|
||||||
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
68
apps/fc-signage-pi-player/tests/systemd_kiosk_wrapper.bats
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
#!/usr/bin/env bats
|
||||||
|
|
||||||
|
setup() {
|
||||||
|
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit exists" {
|
||||||
|
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit uses simple chromium service with restart backoff" {
|
||||||
|
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
|
||||||
|
[[ "$unit" == *"Type=simple"* ]]
|
||||||
|
[[ "$unit" == *"Restart=always"* ]]
|
||||||
|
[[ "$unit" == *"RestartSec=10s"* ]]
|
||||||
|
[[ "$unit" == *"StartLimitBurst=5"* ]]
|
||||||
|
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit caps chromium memory at two gigabytes" {
|
||||||
|
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit condition-gates startup on identity and p12 certificate" {
|
||||||
|
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "player unit runs prelaunch checks before chromium" {
|
||||||
|
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "hdmi udev rule routes through the two-second settle service" {
|
||||||
|
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
|
||||||
|
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
|
||||||
|
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
|
||||||
|
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "hdmi responder settles, declares display, then restarts chromium" {
|
||||||
|
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
|
||||||
|
[[ "$responder" == *"sleep 2"* ]]
|
||||||
|
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
|
||||||
|
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "chromium policy json is valid and disables credential prompts" {
|
||||||
|
command -v jq >/dev/null || skip "jq not installed"
|
||||||
|
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
|
||||||
|
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "launch script tries embed URL and logs bare-player fallback" {
|
||||||
|
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
|
||||||
|
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
|
||||||
|
[[ "$launch" == *"url-divergence.log"* ]]
|
||||||
|
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
|
||||||
|
}
|
||||||
|
|
||||||
|
@test "prelaunch script validates required node and cert files" {
|
||||||
|
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
|
||||||
|
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
|
||||||
|
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
|
||||||
|
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
|
||||||
|
[[ "$prelaunch" == *"exit 1"* ]]
|
||||||
|
}
|
||||||
@@ -76,15 +76,13 @@ spec:
|
|||||||
memory: "512Mi"
|
memory: "512Mi"
|
||||||
cpu: "500m"
|
cpu: "500m"
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
tcpSocket:
|
||||||
path: /health
|
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ import logging
|
|||||||
import re
|
import re
|
||||||
import shlex
|
import shlex
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import unicodedata
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, HTTPException
|
from fastapi import FastAPI, HTTPException
|
||||||
@@ -60,6 +61,189 @@ class TtsRequest(BaseModel):
|
|||||||
volume: int = 100 # 0-200
|
volume: int = 100 # 0-200
|
||||||
|
|
||||||
|
|
||||||
|
HEBREW_CHAR_RE = re.compile(r"[\u0590-\u05FF]")
|
||||||
|
HEBREW_WORD_RE = re.compile(r"[\u0590-\u05FF]+")
|
||||||
|
|
||||||
|
# eSpeak-NG's Hebrew voice can spell unpointed Hebrew as Unicode character
|
||||||
|
# names on some builds. For source-text study reads, prefer a stable
|
||||||
|
# scholarly transliteration so words sound like words even without niqqud.
|
||||||
|
HEBREW_WORD_TRANSLITERATIONS = {
|
||||||
|
"אב": "av",
|
||||||
|
"אבא": "abba",
|
||||||
|
"אברהם": "Avraham",
|
||||||
|
"אדמה": "adamah",
|
||||||
|
"אדני": "Adonai",
|
||||||
|
"אדם": "adam",
|
||||||
|
"אור": "or",
|
||||||
|
"אלהים": "Elohim",
|
||||||
|
"אלוהים": "Elohim",
|
||||||
|
"אמן": "amen",
|
||||||
|
"אם": "em",
|
||||||
|
"אמת": "emet",
|
||||||
|
"ארץ": "eretz",
|
||||||
|
"אש": "esh",
|
||||||
|
"את": "et",
|
||||||
|
"בית": "beit",
|
||||||
|
"בן": "ben",
|
||||||
|
"ברא": "bara",
|
||||||
|
"בראשית": "bereshit",
|
||||||
|
"ברית": "berit",
|
||||||
|
"ברוך": "barukh",
|
||||||
|
"בת": "bat",
|
||||||
|
"גוי": "goy",
|
||||||
|
"גוים": "goyim",
|
||||||
|
"גויים": "goyim",
|
||||||
|
"דבר": "davar",
|
||||||
|
"דברים": "devarim",
|
||||||
|
"דוד": "David",
|
||||||
|
"הלל": "hallel",
|
||||||
|
"הארץ": "ha-aretz",
|
||||||
|
"הברית": "ha-berit",
|
||||||
|
"החדשה": "ha-chadashah",
|
||||||
|
"השמים": "ha-shamayim",
|
||||||
|
"השמיים": "ha-shamayim",
|
||||||
|
"ויאמר": "vayomer",
|
||||||
|
"יהוה": "Adonai",
|
||||||
|
"יוסף": "Yosef",
|
||||||
|
"יוחנן": "Yochanan",
|
||||||
|
"ישראל": "Yisrael",
|
||||||
|
"ישוע": "Yeshua",
|
||||||
|
"יצחק": "Yitzchak",
|
||||||
|
"יעקב": "Yaakov",
|
||||||
|
"ירושלים": "Yerushalayim",
|
||||||
|
"כהן": "kohen",
|
||||||
|
"כהנים": "kohanim",
|
||||||
|
"מים": "mayim",
|
||||||
|
"מות": "mavet",
|
||||||
|
"מושיע": "moshia",
|
||||||
|
"מלך": "melekh",
|
||||||
|
"מלכות": "malkhut",
|
||||||
|
"מרים": "Miriam",
|
||||||
|
"משה": "Moshe",
|
||||||
|
"משיח": "Mashiach",
|
||||||
|
"נביא": "navi",
|
||||||
|
"נביאים": "neviim",
|
||||||
|
"עם": "am",
|
||||||
|
"עולם": "olam",
|
||||||
|
"צדק": "tzedek",
|
||||||
|
"קדוש": "qadosh",
|
||||||
|
"קדושים": "qedoshim",
|
||||||
|
"קול": "qol",
|
||||||
|
"רוח": "ruach",
|
||||||
|
"שאול": "Shaul",
|
||||||
|
"שמים": "shamayim",
|
||||||
|
"שמיים": "shamayim",
|
||||||
|
"שמעון": "Shimon",
|
||||||
|
"שלום": "Shalom",
|
||||||
|
"תורה": "torah",
|
||||||
|
"חכמה": "chokhmah",
|
||||||
|
"חסד": "chesed",
|
||||||
|
"חיים": "chayim",
|
||||||
|
"חושך": "choshekh",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_LETTERS = {
|
||||||
|
"א": "a",
|
||||||
|
"ב": "b",
|
||||||
|
"ג": "g",
|
||||||
|
"ד": "d",
|
||||||
|
"ה": "h",
|
||||||
|
"ו": "v",
|
||||||
|
"ז": "z",
|
||||||
|
"ח": "kh",
|
||||||
|
"ט": "t",
|
||||||
|
"י": "y",
|
||||||
|
"כ": "kh",
|
||||||
|
"ך": "kh",
|
||||||
|
"ל": "l",
|
||||||
|
"מ": "m",
|
||||||
|
"ם": "m",
|
||||||
|
"נ": "n",
|
||||||
|
"ן": "n",
|
||||||
|
"ס": "s",
|
||||||
|
"ע": "a",
|
||||||
|
"פ": "p",
|
||||||
|
"ף": "f",
|
||||||
|
"צ": "ts",
|
||||||
|
"ץ": "ts",
|
||||||
|
"ק": "q",
|
||||||
|
"ר": "r",
|
||||||
|
"ש": "sh",
|
||||||
|
"ת": "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
HEBREW_VOWELISH = {"a", "e", "i", "o", "u"}
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_hebrew_marks(value: str) -> str:
|
||||||
|
decomposed = unicodedata.normalize("NFD", value)
|
||||||
|
return "".join(
|
||||||
|
ch for ch in decomposed
|
||||||
|
if unicodedata.category(ch) != "Mn" and ch not in {"׳", "״", "־"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _fallback_hebrew_transliteration(word: str) -> str:
|
||||||
|
tokens: list[str] = []
|
||||||
|
chars = list(word)
|
||||||
|
for index, ch in enumerate(chars):
|
||||||
|
token = HEBREW_LETTERS.get(ch)
|
||||||
|
if token is None:
|
||||||
|
continue
|
||||||
|
if ch == "ה" and index == len(chars) - 1:
|
||||||
|
token = "ah"
|
||||||
|
elif ch == "י" and index > 0:
|
||||||
|
token = "i"
|
||||||
|
elif ch == "ו" and index > 0:
|
||||||
|
token = "o"
|
||||||
|
tokens.append(token)
|
||||||
|
|
||||||
|
if not tokens:
|
||||||
|
return word
|
||||||
|
|
||||||
|
spoken: list[str] = []
|
||||||
|
for index, token in enumerate(tokens):
|
||||||
|
spoken.append(token)
|
||||||
|
next_token = tokens[index + 1] if index + 1 < len(tokens) else ""
|
||||||
|
if (
|
||||||
|
token[-1:] not in HEBREW_VOWELISH
|
||||||
|
and next_token
|
||||||
|
and next_token[:1] not in HEBREW_VOWELISH
|
||||||
|
):
|
||||||
|
spoken.append("a")
|
||||||
|
return "".join(spoken)
|
||||||
|
|
||||||
|
|
||||||
|
def _transliterate_hebrew_word(match: re.Match[str]) -> str:
|
||||||
|
original = match.group(0)
|
||||||
|
normalized = _strip_hebrew_marks(original)
|
||||||
|
if not normalized:
|
||||||
|
return original
|
||||||
|
|
||||||
|
direct = HEBREW_WORD_TRANSLITERATIONS.get(normalized)
|
||||||
|
if direct:
|
||||||
|
return direct
|
||||||
|
|
||||||
|
if normalized.startswith("ו") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ve-{rest}"
|
||||||
|
|
||||||
|
if normalized.startswith("ה") and len(normalized) > 1:
|
||||||
|
rest = HEBREW_WORD_TRANSLITERATIONS.get(normalized[1:])
|
||||||
|
if rest:
|
||||||
|
return f"ha-{rest}"
|
||||||
|
|
||||||
|
return _fallback_hebrew_transliteration(normalized)
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_synthesis_input(text: str, language: str, voice: str) -> tuple[str, str]:
|
||||||
|
if language.lower().startswith("he") and HEBREW_CHAR_RE.search(text):
|
||||||
|
spoken = HEBREW_WORD_RE.sub(_transliterate_hebrew_word, text)
|
||||||
|
return spoken, "en-us"
|
||||||
|
return text, voice
|
||||||
|
|
||||||
|
|
||||||
def _resolve_voice(req: TtsRequest) -> str:
|
def _resolve_voice(req: TtsRequest) -> str:
|
||||||
if req.voice:
|
if req.voice:
|
||||||
return req.voice.strip()
|
return req.voice.strip()
|
||||||
@@ -115,14 +299,15 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
|
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
args = [
|
args = [
|
||||||
"--stdout",
|
"--stdout",
|
||||||
"-v", voice,
|
"-v", synth_voice,
|
||||||
"-s", str(max(80, min(450, req.rate))),
|
"-s", str(max(80, min(450, req.rate))),
|
||||||
"-p", str(max(0, min(99, req.pitch))),
|
"-p", str(max(0, min(99, req.pitch))),
|
||||||
"-a", str(max(0, min(200, req.volume))),
|
"-a", str(max(0, min(200, req.volume))),
|
||||||
]
|
]
|
||||||
wav = _run_espeak(args, req.text.encode("utf-8"))
|
wav = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
if not wav:
|
if not wav:
|
||||||
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
raise HTTPException(status_code=500, detail="espeak-ng returned empty stdout")
|
||||||
return Response(content=wav, media_type="audio/wav")
|
return Response(content=wav, media_type="audio/wav")
|
||||||
@@ -153,9 +338,9 @@ def tts(req: TtsRequest) -> Response:
|
|||||||
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
PHONEME_DURATION_RE = re.compile(r"^\s*\S+\s+(\d+)\s+", re.MULTILINE)
|
||||||
|
|
||||||
|
|
||||||
def _estimate_total_ms(req: TtsRequest, voice: str) -> int:
|
def _estimate_total_ms(req: TtsRequest, voice: str, spoken_text: str) -> int:
|
||||||
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
args = ["--pho", "--quiet", "-v", voice, "-s", str(req.rate)]
|
||||||
out = _run_espeak(args, req.text.encode("utf-8"))
|
out = _run_espeak(args, spoken_text.encode("utf-8"))
|
||||||
text = out.decode("utf-8", errors="replace")
|
text = out.decode("utf-8", errors="replace")
|
||||||
total = 0
|
total = 0
|
||||||
for match in PHONEME_DURATION_RE.finditer(text):
|
for match in PHONEME_DURATION_RE.finditer(text):
|
||||||
@@ -175,7 +360,8 @@ def timings(req: TtsRequest):
|
|||||||
if not req.text.strip():
|
if not req.text.strip():
|
||||||
raise HTTPException(status_code=400, detail="text is required")
|
raise HTTPException(status_code=400, detail="text is required")
|
||||||
voice = _resolve_voice(req)
|
voice = _resolve_voice(req)
|
||||||
total_ms = _estimate_total_ms(req, voice)
|
spoken_text, synth_voice = _prepare_synthesis_input(req.text, req.language, voice)
|
||||||
|
total_ms = _estimate_total_ms(req, synth_voice, spoken_text)
|
||||||
|
|
||||||
# Distribute total_ms across whitespace-split words proportional to
|
# Distribute total_ms across whitespace-split words proportional to
|
||||||
# character count. Punctuation-only tokens are folded into the previous
|
# character count. Punctuation-only tokens are folded into the previous
|
||||||
@@ -204,7 +390,7 @@ def timings(req: TtsRequest):
|
|||||||
{
|
{
|
||||||
"text": req.text,
|
"text": req.text,
|
||||||
"language": req.language,
|
"language": req.language,
|
||||||
"voice": voice,
|
"voice": synth_voice,
|
||||||
"words": out_words,
|
"words": out_words,
|
||||||
"durationMs": total_ms,
|
"durationMs": total_ms,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,19 @@ spec:
|
|||||||
app.kubernetes.io/name: ttsreader-piper
|
app.kubernetes.io/name: ttsreader-piper
|
||||||
app.kubernetes.io/part-of: flowercore
|
app.kubernetes.io/part-of: flowercore
|
||||||
spec:
|
spec:
|
||||||
|
# Bypass CoreDNS's *.iamworkin.lan wildcard so the init container reaches
|
||||||
|
# huggingface.co directly when it seeds voice models.
|
||||||
|
dnsPolicy: None
|
||||||
|
dnsConfig:
|
||||||
|
nameservers:
|
||||||
|
- 10.43.0.10
|
||||||
|
searches:
|
||||||
|
- fc-ttsreader.svc.cluster.local
|
||||||
|
- svc.cluster.local
|
||||||
|
- cluster.local
|
||||||
|
options:
|
||||||
|
- name: ndots
|
||||||
|
value: "2"
|
||||||
initContainers:
|
initContainers:
|
||||||
- name: seed-voices
|
- name: seed-voices
|
||||||
image: rhasspy/wyoming-piper:latest
|
image: rhasspy/wyoming-piper:latest
|
||||||
@@ -296,14 +309,23 @@ spec:
|
|||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 5
|
||||||
failureThreshold: 18
|
failureThreshold: 18
|
||||||
|
# Sprint E Phase 1a (kokoro stability) — 4 restarts in 2d6h with
|
||||||
|
# exit 143 traced to liveness probe `context deadline exceeded` while
|
||||||
|
# kokoro was busy synthesizing. /v1/audio/voices shares the FastAPI
|
||||||
|
# worker pool with /v1/audio/speech, so a long synth can starve the
|
||||||
|
# probe out within the prior 5s × 3 = 15s window. Bump timeoutSeconds
|
||||||
|
# 5 → 15 and failureThreshold 3 → 5 → 75s grace before kubelet kills
|
||||||
|
# the pod. The TtsCircuitBreaker on the synthesizer side (Phase 1b)
|
||||||
|
# backs this up so the FC backend stops slamming kokoro during
|
||||||
|
# recovery.
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /v1/audio/voices
|
path: /v1/audio/voices
|
||||||
port: 8880
|
port: 8880
|
||||||
initialDelaySeconds: 180
|
initialDelaySeconds: 180
|
||||||
periodSeconds: 30
|
periodSeconds: 30
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 15
|
||||||
failureThreshold: 3
|
failureThreshold: 5
|
||||||
---
|
---
|
||||||
# fc-biblical-tts — eSpeak-NG-backed Ancient Greek + Hebrew TTS with
|
# fc-biblical-tts — eSpeak-NG-backed Ancient Greek + Hebrew TTS with
|
||||||
# word-level timing for read-along playback. Companion to ttsreader-kokoro
|
# word-level timing for read-along playback. Companion to ttsreader-kokoro
|
||||||
@@ -337,7 +359,7 @@ spec:
|
|||||||
runAsUser: 1654
|
runAsUser: 1654
|
||||||
containers:
|
containers:
|
||||||
- name: biblical-tts
|
- name: biblical-tts
|
||||||
image: localhost/fc-biblical-tts:v1
|
image: localhost/fc-biblical-tts:v20260506-hebrew-translit
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 10402
|
- containerPort: 10402
|
||||||
@@ -510,7 +532,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-ttsreader-web:v202604252002
|
image: localhost/fc-ttsreader-web:v20260603-s54cx14-pr29-schema
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5217
|
- containerPort: 5217
|
||||||
@@ -528,12 +550,20 @@ spec:
|
|||||||
value: "/usr/bin/ffmpeg"
|
value: "/usr/bin/ffmpeg"
|
||||||
- name: TtsReader__Bible__CorpusRoot
|
- name: TtsReader__Bible__CorpusRoot
|
||||||
value: "/data/corpus-cache/world-english-bible/eng/usx"
|
value: "/data/corpus-cache/world-english-bible/eng/usx"
|
||||||
|
- name: TtsReader__ChapterContext__DatabasePath
|
||||||
|
value: "/data/chapter-context.db"
|
||||||
- name: TtsReader__Jobs__Root
|
- name: TtsReader__Jobs__Root
|
||||||
value: "/data/jobs"
|
value: "/data/jobs"
|
||||||
|
- name: TtsReader__Export__LocalCasRoot
|
||||||
|
value: "/data/bundles/cas"
|
||||||
- name: TtsReader__Piper__Host
|
- name: TtsReader__Piper__Host
|
||||||
value: "ttsreader-piper.fc-ttsreader.svc.cluster.local."
|
value: "10.0.57.17"
|
||||||
- name: TtsReader__Piper__Port
|
- name: TtsReader__Piper__Port
|
||||||
value: "10200"
|
value: "8500"
|
||||||
|
- name: TtsReader__Piper__Transport
|
||||||
|
value: "http"
|
||||||
|
- name: TtsReader__Piper__HttpPath
|
||||||
|
value: "/tts"
|
||||||
- name: TtsReader__Kokoro__Enabled
|
- name: TtsReader__Kokoro__Enabled
|
||||||
value: "true"
|
value: "true"
|
||||||
- name: TtsReader__Kokoro__BaseUrl
|
- name: TtsReader__Kokoro__BaseUrl
|
||||||
@@ -544,6 +574,14 @@ spec:
|
|||||||
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
value: "http://ttsreader-kokoro.fc-ttsreader.svc.cluster.local.:8880"
|
||||||
- name: TtsReader__Kokoro__TimeoutSeconds
|
- name: TtsReader__Kokoro__TimeoutSeconds
|
||||||
value: "120"
|
value: "120"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__BaseUrl
|
||||||
|
value: "http://ttsreader-biblical.fc-ttsreader.svc.cluster.local.:10402"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__TimeoutSeconds
|
||||||
|
value: "60"
|
||||||
|
- name: FlowerCore__Tts__BiblicalTts__DefaultLanguage
|
||||||
|
value: "grc"
|
||||||
- name: Speech__Alignment__Enabled
|
- name: Speech__Alignment__Enabled
|
||||||
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
# Cluster-native faster-whisper (Lane F, 2026-04-25). The
|
||||||
# ttsreader-align deployment in this manifest wraps
|
# ttsreader-align deployment in this manifest wraps
|
||||||
@@ -573,6 +611,21 @@ spec:
|
|||||||
value: "/data/logs"
|
value: "/data/logs"
|
||||||
- name: TtsReader__Runtime__SmokeStatePath
|
- name: TtsReader__Runtime__SmokeStatePath
|
||||||
value: "/data/ops/smoke-status.json"
|
value: "/data/ops/smoke-status.json"
|
||||||
|
# Sprint E Day 8 voice-preview disk cache — writes WAVs under
|
||||||
|
# this directory. Default "data/voice-previews" resolves to
|
||||||
|
# the read-only $HOME path under runAsNonRoot=true. Pin to
|
||||||
|
# the writable PVC mount.
|
||||||
|
- name: TtsReader__Preview__CacheDirectory
|
||||||
|
value: "/data/voice-previews"
|
||||||
|
- name: TtsReader__VoiceLibrary__ReferenceClip__Directory
|
||||||
|
value: "/data/voice-reference-clips"
|
||||||
|
# Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
|
||||||
|
# POST /api/v1/render. Default "wwwroot/cdn" resolves under the
|
||||||
|
# read-only app filesystem, so pin to the writable PVC mount
|
||||||
|
# alongside other TtsReader runtime data. Manifests + cue audio
|
||||||
|
# land at /data/cdn/sha256/<hash>/manifest.json + cues/.
|
||||||
|
- name: TtsReader__Render__CdnDirectory
|
||||||
|
value: "/data/cdn"
|
||||||
- name: Auth__ApiKey
|
- name: Auth__ApiKey
|
||||||
valueFrom:
|
valueFrom:
|
||||||
secretKeyRef:
|
secretKeyRef:
|
||||||
@@ -587,7 +640,10 @@ spec:
|
|||||||
optional: true
|
optional: true
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
# The cluster is currently saturated on requested CPU by
|
||||||
|
# remotedesktop workloads even when real usage is low.
|
||||||
|
# Keep the web frontend schedulable under that pressure.
|
||||||
|
cpu: 10m
|
||||||
memory: 256Mi
|
memory: 256Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
|
|||||||
47
apps/fc-updater/README.md
Normal file
47
apps/fc-updater/README.md
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# fc-updater — Update Center GitOps adoption
|
||||||
|
|
||||||
|
**Status:** adopted into `bluejay-infra` on 2026-05-06. The live ArgoCD
|
||||||
|
Application is `infra-fc-updater`, generated by the `bluejay-infra`
|
||||||
|
ApplicationSet with automated sync, `prune: true`, and `selfHeal: true`.
|
||||||
|
|
||||||
|
## Managed manifest set
|
||||||
|
|
||||||
|
`apps/fc-updater/fc-updater.yaml` manages:
|
||||||
|
|
||||||
|
- `Namespace/fc-updater`
|
||||||
|
- `PersistentVolumeClaim/updatecenter-data`
|
||||||
|
- `Deployment/updatecenter-web`
|
||||||
|
- `Service/updatecenter-web`
|
||||||
|
- `Certificate/updatecenter-web-tls`
|
||||||
|
- `Certificate/updatecenter-web-internal-tls`
|
||||||
|
- `IngressRoute/updatecenter-web`
|
||||||
|
- `IngressRoute/updatecenter-web-internal`
|
||||||
|
- `IngressRoute/updatecenter-web-public`
|
||||||
|
|
||||||
|
The Deployment intentionally sets `revisionHistoryLimit: 3` and
|
||||||
|
`strategy.type: Recreate`. The service is singleton + SQLite/local bundle
|
||||||
|
storage on `PersistentVolumeClaim/updatecenter-data`, pinned to
|
||||||
|
`rke2-server`.
|
||||||
|
|
||||||
|
## Runtime dependencies intentionally not stored here
|
||||||
|
|
||||||
|
These live Secrets are pre-existing runtime material and are not committed to
|
||||||
|
Git:
|
||||||
|
|
||||||
|
- `updater-bootstrap-auth`
|
||||||
|
- `updater-signing`
|
||||||
|
- `updater-webhooks`
|
||||||
|
- `cf-origin-flowercore-io`
|
||||||
|
|
||||||
|
Rotate the Cloudflare Origin Certificate through
|
||||||
|
`FlowerCore.Notes/docs/standards/code-signing-rotation-runbook.md`; the
|
||||||
|
shared origin cert must exist in every namespace that serves a
|
||||||
|
`*.flowercore.io` public IngressRoute.
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n argocd get application infra-fc-updater
|
||||||
|
kubectl.exe --kubeconfig C:\Users\AndrewStoltz\.kube\rke2.yaml -n fc-updater get deploy,svc,ingressroute,certificate,pvc
|
||||||
|
curl.exe -sk https://update.flowercore.io/api/v1/manifests/_schema
|
||||||
|
```
|
||||||
271
apps/fc-updater/fc-updater.yaml
Normal file
271
apps/fc-updater/fc-updater.yaml
Normal file
@@ -0,0 +1,271 @@
|
|||||||
|
# FlowerCore Update Center
|
||||||
|
# GitOps adoption of the live fc-updater namespace after PUB-1/PUB-3.
|
||||||
|
# Runtime credentials remain in existing K8s Secrets; do not store them here.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-data
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
# Sized for fleet bundle storage (LocalFsBundleStore.MaxTotalBytes
|
||||||
|
# soft cap at 25 GiB per project_uc_remaining_4_apps_signed_2026_05_06).
|
||||||
|
# Mike Bundle alone is ~5.1 GiB; cluster live capacity is already
|
||||||
|
# 20 GiB after a manual expand. PVCs cannot shrink, so git must track
|
||||||
|
# at least the live size to avoid the OutOfSync loop.
|
||||||
|
storage: 25Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# SQLite + local bundle storage live on a single RWO PVC. Recreate avoids
|
||||||
|
# two pods overlapping the same write path during future image bumps.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: updatecenter-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
spec:
|
||||||
|
nodeName: rke2-server
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: localhost/fc-updater-web:v202605310029-7974fc4
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: http://+:8080
|
||||||
|
- name: FlowerCore__Updater__Database__Provider
|
||||||
|
value: sqlite
|
||||||
|
- name: FlowerCore__Updater__Database__ConnectionString
|
||||||
|
value: Data Source=/data/updatecenter.db
|
||||||
|
- name: FlowerCore__Updater__BundleStorage__LocalFs__RootDirectory
|
||||||
|
value: /data/bundles
|
||||||
|
- name: FlowerCore__Updater__PublicShares__RequirePublicVisibilityOnPublicHosts
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Code
|
||||||
|
value: 8f3c2a9e7d41
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__AppId
|
||||||
|
value: flowercore.faith-ai-mike
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Channel
|
||||||
|
value: stable
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__RuntimeId
|
||||||
|
value: win-x64
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__DisplayName
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Headline
|
||||||
|
value: Faith AI Mike Edition
|
||||||
|
- name: FlowerCore__Updater__PublicShares__Links__0__Description
|
||||||
|
value: Private release link for Mike's Faith AI bundle.
|
||||||
|
- name: FlowerCore__Audit__Sinks__Loki__Enabled
|
||||||
|
value: "false"
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Username
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: username
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__Password
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: password
|
||||||
|
- name: FlowerCore__Updater__Auth__Bootstrap__SigningKey
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-bootstrap-auth
|
||||||
|
key: signing-key
|
||||||
|
- name: FlowerCore__Updater__Signing__AutoSignOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__RequireSignatureOnPublish
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxBase64
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-base64
|
||||||
|
- name: FlowerCore__Updater__Signing__PfxPassword
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-signing
|
||||||
|
key: pfx-password
|
||||||
|
- name: FlowerCore__Updater__Signing__OpItemReference
|
||||||
|
value: op://FlowerCore/step-ca-codesign
|
||||||
|
- name: FlowerCore__Updater__Signing__TrustAnchorPath
|
||||||
|
value: /etc/flowercore-updater/signing/root-ca.pem
|
||||||
|
- name: FlowerCore__Updater__GitHub__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-token
|
||||||
|
- name: FlowerCore__Updater__GitHub__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: github-webhook-secret
|
||||||
|
- name: FlowerCore__Updater__Gitea__Token
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-token
|
||||||
|
- name: FlowerCore__Updater__Gitea__WebhookSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: updater-webhooks
|
||||||
|
key: gitea-webhook-secret
|
||||||
|
readinessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 15
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: signing
|
||||||
|
mountPath: /etc/flowercore-updater/signing
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: updatecenter-data
|
||||||
|
- name: signing
|
||||||
|
secret:
|
||||||
|
secretName: updater-signing
|
||||||
|
items:
|
||||||
|
- key: root-ca.pem
|
||||||
|
path: root-ca.pem
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
labels:
|
||||||
|
app: updatecenter-web
|
||||||
|
app.kubernetes.io/name: updatecenter-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: updatecenter-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: http
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter.iamworkin.lan
|
||||||
|
- updates.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal-tls
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- updatecenter-internal.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`updatecenter.iamworkin.lan`) || Host(`updates.iamworkin.lan`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-internal
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- web
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`updatecenter-internal.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: updatecenter-web-internal-tls
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: updatecenter-web-public
|
||||||
|
namespace: fc-updater
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: (Host(`update.flowercore.io`) || Host(`updates.flowercore.io`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: updatecenter-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: cf-origin-flowercore-io
|
||||||
7
apps/fc-updater/kustomization.yaml
Normal file
7
apps/fc-updater/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
# ArgoCD's bluejay-infra ApplicationSet uses a directory generator and does
|
||||||
|
# not require kustomization.yaml. Keep this anyway as the manifest inventory
|
||||||
|
# and for local `kubectl kustomize apps/fc-updater` previews.
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- fc-updater.yaml
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
# FlowerCore Tenant — flowercore.io (main brand)
|
# FlowerCore Tenant — retired flowercore.io placeholder.
|
||||||
# Public-facing placeholder landing page served by nginx
|
#
|
||||||
|
# Public flowercore.io/www.flowercore.io routing is now owned by
|
||||||
|
# apps/fc-landing/fc-landing.yaml. This tenant placeholder remains available
|
||||||
|
# only as an in-cluster service; do not create a duplicate public
|
||||||
|
# IngressRoute here because it competes with fc-landing and requires a
|
||||||
|
# namespace-local cf-origin-flowercore-io Secret.
|
||||||
# ArgoCD managed - BlueJay Lab
|
# ArgoCD managed - BlueJay Lab
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -10,12 +15,6 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: bluejay-infra
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
flowercore.io/tenant: flowercore
|
flowercore.io/tenant: flowercore
|
||||||
---
|
---
|
||||||
# NOTE: The existing cf-origin-flowercore-io secret (covering *.flowercore.io)
|
|
||||||
# must be copied into this namespace. It already exists in other namespaces.
|
|
||||||
# Copy with: kubectl get secret cf-origin-flowercore-io -n fc-system -o yaml \
|
|
||||||
# | sed 's/namespace: .*/namespace: tenant-flowercore/' \
|
|
||||||
# | kubectl apply -f -
|
|
||||||
---
|
|
||||||
# Landing page HTML
|
# Landing page HTML
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
@@ -311,22 +310,3 @@ spec:
|
|||||||
- port: 80
|
- port: 80
|
||||||
targetPort: 80
|
targetPort: 80
|
||||||
name: http
|
name: http
|
||||||
---
|
|
||||||
# Traefik IngressRoute — public via Cloudflare
|
|
||||||
# Uses existing cf-origin-flowercore-io cert (must be copied to this namespace)
|
|
||||||
apiVersion: traefik.io/v1alpha1
|
|
||||||
kind: IngressRoute
|
|
||||||
metadata:
|
|
||||||
name: flowercore-web
|
|
||||||
namespace: tenant-flowercore
|
|
||||||
spec:
|
|
||||||
entryPoints:
|
|
||||||
- websecure
|
|
||||||
routes:
|
|
||||||
- match: Host(`flowercore.io`) || Host(`www.flowercore.io`)
|
|
||||||
kind: Rule
|
|
||||||
services:
|
|
||||||
- name: flowercore-web
|
|
||||||
port: 80
|
|
||||||
tls:
|
|
||||||
secretName: cf-origin-flowercore-io
|
|
||||||
|
|||||||
2
apps/github-runner/.gitattributes
vendored
Normal file
2
apps/github-runner/.gitattributes
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*.sh text eol=lf
|
||||||
|
Dockerfile text eol=lf
|
||||||
54
apps/github-runner/Dockerfile
Normal file
54
apps/github-runner/Dockerfile
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
FROM myoung34/github-runner:latest
|
||||||
|
|
||||||
|
ARG RUBY_VERSION=3.3.11
|
||||||
|
ARG RUBY_MINOR=3.3
|
||||||
|
ARG RUBY_BUILD_VERSION=v20260326
|
||||||
|
ARG RUNNER_UID=1001
|
||||||
|
ARG RUNNER_GID=1001
|
||||||
|
|
||||||
|
ENV RUNNER_TOOL_CACHE=/home/runner/_tool
|
||||||
|
ENV RUNNER_RUBY_TOOLCACHE=/opt/runner-toolcache
|
||||||
|
ENV PATH="/home/runner/_tool/Ruby/${RUBY_MINOR}/x64/bin:/opt/runner-toolcache/Ruby/${RUBY_MINOR}/x64/bin:${PATH}"
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
# Bake the IAmWorkin step-ca root CA into the system trust store. Without
|
||||||
|
# this, .NET HttpClient calls from CI tests against *.iamworkin.lan
|
||||||
|
# (e.g. https://selenium.iamworkin.lan/session) fail with `PartialChain`
|
||||||
|
# because the runner image's default Ubuntu trust bundle doesn't include
|
||||||
|
# our internal Root CA. update-ca-certificates regenerates
|
||||||
|
# /etc/ssl/certs/ca-certificates.crt, which OpenSSL + .NET on Linux read
|
||||||
|
# automatically — no SSL_CERT_FILE env var needed.
|
||||||
|
COPY step-ca-root.crt /usr/local/share/ca-certificates/iamworkin-step-ca-root.crt
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
|
autoconf \
|
||||||
|
bison \
|
||||||
|
build-essential \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
libdb-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libgdbm-dev \
|
||||||
|
libgmp-dev \
|
||||||
|
libncurses-dev \
|
||||||
|
libreadline-dev \
|
||||||
|
libssl-dev \
|
||||||
|
libyaml-dev \
|
||||||
|
patch \
|
||||||
|
pkg-config \
|
||||||
|
uuid-dev \
|
||||||
|
zlib1g-dev \
|
||||||
|
&& update-ca-certificates \
|
||||||
|
&& curl -fsSL "https://github.com/rbenv/ruby-build/archive/refs/tags/${RUBY_BUILD_VERSION}.tar.gz" -o /tmp/ruby-build.tar.gz \
|
||||||
|
&& mkdir -p /tmp/ruby-build \
|
||||||
|
&& tar -xzf /tmp/ruby-build.tar.gz --strip-components=1 -C /tmp/ruby-build \
|
||||||
|
&& /tmp/ruby-build/install.sh \
|
||||||
|
&& rm -rf /tmp/ruby-build /tmp/ruby-build.tar.gz /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY install-ruby-toolcache.sh /usr/local/bin/install-ruby-toolcache.sh
|
||||||
|
|
||||||
|
RUN chmod +x /usr/local/bin/install-ruby-toolcache.sh \
|
||||||
|
&& RUBY_VERSION="${RUBY_VERSION}" RUBY_MINOR="${RUBY_MINOR}" TOOLCACHE_ROOT="${RUNNER_RUBY_TOOLCACHE}" RUNNER_UID="${RUNNER_UID}" RUNNER_GID="${RUNNER_GID}" /usr/local/bin/install-ruby-toolcache.sh \
|
||||||
|
&& ruby -v
|
||||||
133
apps/github-runner/README.md
Normal file
133
apps/github-runner/README.md
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
# GitHub Runner Fleet
|
||||||
|
|
||||||
|
ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
|
||||||
|
Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
|
||||||
|
|
||||||
|
## Runner Shape
|
||||||
|
|
||||||
|
All repo-scoped Linux runners use:
|
||||||
|
|
||||||
|
- `localhost/fc-github-runner:v20260525-ruby3.3.11-stepca`, derived from
|
||||||
|
`myoung34/github-runner:latest`
|
||||||
|
- `ACCESS_TOKEN` from the `github-runner-token` Secret
|
||||||
|
- `RUN_AS_ROOT=false`
|
||||||
|
- `EPHEMERAL=true`
|
||||||
|
- `LABELS=self-hosted,linux,fc-build-linux`
|
||||||
|
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
|
||||||
|
Actions tool cache
|
||||||
|
- Ruby 3.3.11 seeded into `/home/runner/_tool/Ruby/3.3/x64` from the baked
|
||||||
|
`/opt/runner-toolcache` copy so `ruby/setup-ruby@v1` can discover it on
|
||||||
|
self-hosted `ubuntu-20.04-x64` runners
|
||||||
|
|
||||||
|
`github-runner` for `FlowerCore.Common` is single-replica because it retains the
|
||||||
|
original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
|
||||||
|
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
|
||||||
|
strategy: no two pods share one RWO PVC.
|
||||||
|
|
||||||
|
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
||||||
|
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
|
||||||
|
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
|
||||||
|
`FlowerCore.Distribution`, `FlowerCore.Scoreboard`,
|
||||||
|
`FlowerCore.SegmentDisplay`, `FlowerCore.Signage.Contracts`,
|
||||||
|
`FlowerCore.SignalControl`, `FlowerCore.Intranet.Web`,
|
||||||
|
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
|
||||||
|
`FlowerCore.MenuBoard`.
|
||||||
|
|
||||||
|
## Image Build
|
||||||
|
|
||||||
|
Ruby is baked with a pinned `ruby-build` release and Ruby patch version. The pod
|
||||||
|
still mounts an `emptyDir` over `/home/runner`, so the `setup-runner-home` init
|
||||||
|
container copies the baked toolcache from `/opt/runner-toolcache/Ruby` into
|
||||||
|
`/home/runner/_tool/Ruby` before the runner container starts.
|
||||||
|
|
||||||
|
The IAmWorkin step-ca root CA is also baked into the system trust store
|
||||||
|
(`/usr/local/share/ca-certificates/iamworkin-step-ca-root.crt`, registered by
|
||||||
|
`update-ca-certificates`). Without it, .NET HttpClient calls from CI tests
|
||||||
|
against `*.iamworkin.lan` (e.g. `https://selenium.iamworkin.lan/session`)
|
||||||
|
fail with `PartialChain`. To refresh the bundled cert when the root rotates,
|
||||||
|
re-extract from the cluster and overwrite `step-ca-root.crt`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl get secret -n cert-manager step-ca-root \
|
||||||
|
-o jsonpath='{.data.ca\.crt}' | base64 -d > step-ca-root.crt
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd apps/github-runner
|
||||||
|
podman build -t localhost/fc-github-runner:v20260525-ruby3.3.11-stepca .
|
||||||
|
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca ruby -v
|
||||||
|
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||||
|
test -f /opt/runner-toolcache/Ruby/3.3/x64.complete
|
||||||
|
podman save localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
|
||||||
|
-o fc-github-runner-v20260525-ruby3.3.11-stepca.tar
|
||||||
|
```
|
||||||
|
|
||||||
|
Import the saved image on every schedulable RKE2 node before ArgoCD rolls the
|
||||||
|
Deployments:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for node in rke2-server rke2-agent1 rke2-agent2; do
|
||||||
|
scp fc-github-runner-v20260525-ruby3.3.11-stepca.tar "$node:/tmp/"
|
||||||
|
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca || true'
|
||||||
|
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-github-runner-v20260525-ruby3.3.11-stepca.tar'
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post-Merge Proof
|
||||||
|
|
||||||
|
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n github-runner get deploy,pods,pvc
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the Ruby toolcache in a fresh pod:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- ruby -v
|
||||||
|
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- sh -c \
|
||||||
|
'echo "$RUNNER_TOOL_CACHE" && test -f "$RUNNER_TOOL_CACHE/Ruby/3.3/x64.complete"'
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify GitHub registration for the repo-scoped runners:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
|
||||||
|
FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
|
||||||
|
FlowerCore.MySQL FlowerCore.Kiosk.Linux FlowerCore.Marquee FlowerCore.TtsReader \
|
||||||
|
FlowerCore.Knowledge FlowerCore.LlmBridge FlowerCore.Media \
|
||||||
|
FlowerCore.Presentations FlowerCore.RemoteDesktop FlowerCore.DNS \
|
||||||
|
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
|
||||||
|
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
|
||||||
|
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
|
||||||
|
FlowerCore.MenuBoard; do
|
||||||
|
echo "=== $repo ==="
|
||||||
|
gh api "/repos/astoltz/$repo/actions/runners" \
|
||||||
|
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
Shared.Pos publish proof after the runner pod is online:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
gh run list --repo astoltz/FlowerCore.Shared.Pos \
|
||||||
|
--workflow "Build, Test & Publish" --branch main --limit 5
|
||||||
|
```
|
||||||
|
|
||||||
|
If the latest run is still queued after runner registration, rerun the workflow
|
||||||
|
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
||||||
|
|
||||||
|
## Failure Notes
|
||||||
|
|
||||||
|
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
||||||
|
`DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
|
||||||
|
present on the runner pod.
|
||||||
|
- `ruby/setup-ruby@v1` says self-hosted runners must install Ruby in
|
||||||
|
`$RUNNER_TOOL_CACHE`: check that the init container copied
|
||||||
|
`/opt/runner-toolcache/Ruby` into `/home/runner/_tool/Ruby` and that
|
||||||
|
`/home/runner/_tool/Ruby/3.3/x64.complete` exists.
|
||||||
|
- `404` during runner registration: the fine-grained PAT is valid but missing
|
||||||
|
repository access for that repo. Add the repo to the PAT access list; the PAT
|
||||||
|
value does not change.
|
||||||
|
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
|
||||||
|
stay single-replica. New multi-replica runners use `emptyDir`.
|
||||||
4592
apps/github-runner/github-runner.yaml
Normal file
4592
apps/github-runner/github-runner.yaml
Normal file
File diff suppressed because it is too large
Load Diff
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
19
apps/github-runner/install-ruby-toolcache.sh
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
RUBY_VERSION="${RUBY_VERSION:-3.3.11}"
|
||||||
|
RUBY_MINOR="${RUBY_MINOR:-3.3}"
|
||||||
|
TOOLCACHE_ROOT="${TOOLCACHE_ROOT:-/opt/runner-toolcache}"
|
||||||
|
RUNNER_UID="${RUNNER_UID:-1001}"
|
||||||
|
RUNNER_GID="${RUNNER_GID:-1001}"
|
||||||
|
RUBY_PREFIX="${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64"
|
||||||
|
|
||||||
|
mkdir -p "${TOOLCACHE_ROOT}/Ruby"
|
||||||
|
RUBY_CONFIGURE_OPTS="${RUBY_CONFIGURE_OPTS:---disable-install-doc --disable-yjit}" ruby-build "${RUBY_VERSION}" "${RUBY_PREFIX}"
|
||||||
|
|
||||||
|
touch "${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64.complete"
|
||||||
|
ln -sfn "${RUBY_VERSION}" "${TOOLCACHE_ROOT}/Ruby/${RUBY_MINOR}"
|
||||||
|
|
||||||
|
"${RUBY_PREFIX}/bin/ruby" -v
|
||||||
|
chown -R "${RUNNER_UID}:${RUNNER_GID}" "${TOOLCACHE_ROOT}"
|
||||||
|
chmod -R a+rX "${TOOLCACHE_ROOT}"
|
||||||
12
apps/github-runner/step-ca-root.crt
Normal file
12
apps/github-runner/step-ca-root.crt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBxDCCAWqgAwIBAgIRAPY357G6ow6zMAL5+4bS2kkwCgYIKoZIzj0EAwIwQDEa
|
||||||
|
MBgGA1UEChMRSUFtV29ya2luIEFDTUUgQ0ExIjAgBgNVBAMTGUlBbVdvcmtpbiBB
|
||||||
|
Q01FIENBIFJvb3QgQ0EwHhcNMjYwMzA4MTgwNzExWhcNMzYwMzA1MTgwNzExWjBA
|
||||||
|
MRowGAYDVQQKExFJQW1Xb3JraW4gQUNNRSBDQTEiMCAGA1UEAxMZSUFtV29ya2lu
|
||||||
|
IEFDTUUgQ0EgUm9vdCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABJ2n04X1
|
||||||
|
JZo5Zdq/i1Idv8+fqwZyAzBh7whbqj0SWsJL8UWRabCMqYCs7+dXO0xRSzqkwFDL
|
||||||
|
x+vooOai8RgRNhajRTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/
|
||||||
|
AgEBMB0GA1UdDgQWBBRnuPPQR6iM/H6vOluiU3Sygayz8jAKBggqhkjOPQQDAgNI
|
||||||
|
ADBFAiEArQK9dYPGmAZsdYnjziuFVVE5NKZUcceYvGfGC+tLXUsCIAudF2zJrCRq
|
||||||
|
3mK50ZZET/fwTkJwiEF4824mjP8p1CKM
|
||||||
|
-----END CERTIFICATE-----
|
||||||
@@ -465,6 +465,23 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
||||||
---
|
---
|
||||||
|
---
|
||||||
|
# 1Password-backed credentials for Mac mini VNC access (Phase 1 <20> 2026-04-28)
|
||||||
|
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
||||||
|
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
||||||
|
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
||||||
|
# Actual IP is 10.0.56.115 (INFRA VLAN) <20> the 1P item 'IP' field is kept as backup reference.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: macmini-vnc-creds
|
||||||
|
namespace: guacamole
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/component: credentials
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
itemPath: vaults/IAmWorkin/items/Mac Mini
|
||||||
|
---
|
||||||
# Blue Jay Branding Extension (CSS + translations)
|
# Blue Jay Branding Extension (CSS + translations)
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|||||||
@@ -16,6 +16,15 @@ spec:
|
|||||||
requests:
|
requests:
|
||||||
storage: 1Gi
|
storage: 1Gi
|
||||||
---
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: intranet-config
|
||||||
|
namespace: intranet
|
||||||
|
data:
|
||||||
|
KnowledgeApiKey: ""
|
||||||
|
TrustedHeaderSharedSecret: ""
|
||||||
|
---
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
@@ -37,7 +46,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: intranet-web
|
- name: intranet-web
|
||||||
image: localhost/fc-intranet-web:v202604242354overridefix
|
image: localhost/fc-intranet-web:v20260531-ttsreader-bridge
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 5300
|
- containerPort: 5300
|
||||||
@@ -52,6 +61,27 @@ spec:
|
|||||||
# in minutes. Memory: feedback_pi5_nomic_embed_slow.
|
# in minutes. Memory: feedback_pi5_nomic_embed_slow.
|
||||||
- name: IntranetSearch__OllamaBaseUrl
|
- name: IntranetSearch__OllamaBaseUrl
|
||||||
value: "http://10.0.56.20:11434"
|
value: "http://10.0.56.20:11434"
|
||||||
|
# Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence
|
||||||
|
# on the writable PVC at /data. Without this env var the
|
||||||
|
# intranet falls back to the in-memory store (loses state on
|
||||||
|
# pod restart). Master's PageReadingOverrideOptions binds
|
||||||
|
# PageReadingOverrides:FilePath.
|
||||||
|
- name: PageReadingOverrides__FilePath
|
||||||
|
value: "/data/page-reading-overrides.json"
|
||||||
|
- name: KnowledgeFleetSearch__BaseUrl
|
||||||
|
value: "https://knowledge.iamworkin.lan"
|
||||||
|
- name: KnowledgeFleetSearch__ApiKey
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: intranet-config
|
||||||
|
key: KnowledgeApiKey
|
||||||
|
optional: true
|
||||||
|
- name: TrustedHeaderAuthentication__SharedSecret
|
||||||
|
valueFrom:
|
||||||
|
configMapKeyRef:
|
||||||
|
name: intranet-config
|
||||||
|
key: TrustedHeaderSharedSecret
|
||||||
|
optional: true
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
memory: "256Mi"
|
memory: "256Mi"
|
||||||
|
|||||||
@@ -1,7 +1,13 @@
|
|||||||
# knowledge — FlowerCore.Knowledge.Web (Phase 2.4 K8s deploy)
|
# knowledge — FlowerCore.Knowledge.Web (Phase 2.4 K8s deploy)
|
||||||
|
|
||||||
**Status:** manifests staged, **NOT YET APPLIED**. Image must be built +
|
**Status:** **LIVE 2026-04-27** at `https://knowledge.iamworkin.lan` —
|
||||||
imported AND DNS record provisioned before `git push`.
|
Phase 2.4 closed. Pod running, certificate issued (step-ca-acme), PVC
|
||||||
|
bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
|
||||||
|
returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
|
||||||
|
*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
|
||||||
|
population). Phase 1 of the Agent Zero MCP rollout keeps `/healthz`
|
||||||
|
anonymous and gates `/mcp` behind `Authorization: Bearer <token>` built
|
||||||
|
from the 1Password item `FlowerCore Knowledge MCP Tokens`.
|
||||||
|
|
||||||
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
|
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
|
||||||
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
|
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
|
||||||
@@ -15,6 +21,12 @@ search to the rest of the FC ecosystem (Agent Zero, Chat.Web persona
|
|||||||
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
|
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
|
||||||
bot, Pi nodes via `fc-index sync`).
|
bot, Pi nodes via `fc-index sync`).
|
||||||
|
|
||||||
|
Phase 1 MCP routing is explicit:
|
||||||
|
|
||||||
|
- in-cluster Agent Zero → `http://knowledge-web.knowledge.svc/mcp`
|
||||||
|
- workstation Agent Zero → `https://knowledge.iamworkin.lan/mcp`
|
||||||
|
- probe URL for both lanes → `/healthz`
|
||||||
|
|
||||||
## Deployment order (do NOT skip / reorder)
|
## Deployment order (do NOT skip / reorder)
|
||||||
|
|
||||||
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
|
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
|
||||||
|
|||||||
@@ -40,6 +40,25 @@ metadata:
|
|||||||
labels:
|
labels:
|
||||||
app.kubernetes.io/part-of: bluejay-infra
|
app.kubernetes.io/part-of: bluejay-infra
|
||||||
---
|
---
|
||||||
|
# MCP bearer token for the read-only Agent Zero Phase 1 lane. The 1Password
|
||||||
|
# item currently stores the raw token in its concealed PASSWORD field, which
|
||||||
|
# the operator syncs into the namespaced Secret key `password`.
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
namespace: knowledge
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||||
|
---
|
||||||
|
apiVersion: onepassword.com/v1
|
||||||
|
kind: OnePasswordItem
|
||||||
|
metadata:
|
||||||
|
name: knowledge-oidc-client
|
||||||
|
namespace: knowledge
|
||||||
|
spec:
|
||||||
|
itemPath: "vaults/IAmWorkin/items/knowledge-oidc-client"
|
||||||
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: PersistentVolumeClaim
|
kind: PersistentVolumeClaim
|
||||||
metadata:
|
metadata:
|
||||||
@@ -91,8 +110,17 @@ spec:
|
|||||||
- name: web
|
- name: web
|
||||||
# Placeholder tag — bump to the image you built + imported to ALL
|
# Placeholder tag — bump to the image you built + imported to ALL
|
||||||
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
||||||
image: localhost/fc-knowledge-web:v202604272200
|
image: localhost/fc-knowledge-web:v20260603-oidc-authentik-auditfix
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||||
|
export FlowerCore__Mcp__ApiKey__Key="Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}"
|
||||||
|
fi
|
||||||
|
exec dotnet FlowerCore.Knowledge.Web.dll
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
name: http
|
name: http
|
||||||
@@ -103,8 +131,40 @@ spec:
|
|||||||
value: "Production"
|
value: "Production"
|
||||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
value: "false"
|
value: "false"
|
||||||
|
# AuthentiK/OIDC is wired but not enforced until the
|
||||||
|
# knowledge-oidc-client Secret is provisioned. Service-to-service
|
||||||
|
# RAG keeps the existing MCP token as FlowerCore:Auth:ApiKey.
|
||||||
|
- name: FlowerCore__Auth__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Auth__Oidc__Enabled
|
||||||
|
value: "true"
|
||||||
|
- name: FlowerCore__Auth__Oidc__Authority
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-oidc-client
|
||||||
|
key: issuer_url
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__Audience
|
||||||
|
value: "knowledge"
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientId
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-oidc-client
|
||||||
|
key: client_id
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__Oidc__ClientSecret
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-oidc-client
|
||||||
|
key: client_secret
|
||||||
|
optional: true
|
||||||
|
- name: FlowerCore__Auth__ApiKey
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
# Vector-store directory + embedding model + edition profile dir.
|
# Vector-store directory + embedding model + edition profile dir.
|
||||||
# Profile JSON is baked into the image at /app/editions via the
|
# Profile JSON is baked into the image at /home/app/editions via the
|
||||||
# csproj Content-link from FlowerCore.Common/editions/.
|
# csproj Content-link from FlowerCore.Common/editions/.
|
||||||
- name: Knowledge__VectorStoresDirectory
|
- name: Knowledge__VectorStoresDirectory
|
||||||
value: "/data/vector-stores"
|
value: "/data/vector-stores"
|
||||||
@@ -114,13 +174,30 @@ spec:
|
|||||||
value: "5"
|
value: "5"
|
||||||
- name: Knowledge__MaxLimit
|
- name: Knowledge__MaxLimit
|
||||||
value: "50"
|
value: "50"
|
||||||
|
- name: Knowledge__Federation__DatabasePath
|
||||||
|
value: "/data/vector-stores/knowledge-federation.db"
|
||||||
- name: FlowerCore__Editions__ProfileDirectory
|
- name: FlowerCore__Editions__ProfileDirectory
|
||||||
value: "/app/editions"
|
value: "/home/app/editions"
|
||||||
# Embed via BLUEJAY-WS GPU (R9700, 32GB VRAM). Pi5 Ollama is
|
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
|
||||||
# ~4-5x slower; use the workstation while we have it.
|
# services do not depend on BLUEJAY-WS (private dev hardware) per
|
||||||
# Memory: feedback_pi5_nomic_embed_slow.
|
# bluejay-infra@0f9d56e. Query-time embedding is fast enough on
|
||||||
|
# edge1 (~ms per query); bulk index rebuilds (Phase 2.5+) will
|
||||||
|
# need a separate ingestion lane that can opt into the
|
||||||
|
# workstation GPU when present.
|
||||||
- name: FlowerCore__Ollama__BaseUrl
|
- name: FlowerCore__Ollama__BaseUrl
|
||||||
value: "http://10.0.56.20:11434"
|
value: "http://10.0.57.17:11434"
|
||||||
|
- name: FlowerCore__Mcp__ApiKey__Key
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
|
- name: FlowerCore__Mcp__ApiKey__HeaderName
|
||||||
|
value: "Authorization"
|
||||||
|
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: knowledge-mcp-tokens
|
||||||
|
key: password
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
@@ -166,7 +243,7 @@ spec:
|
|||||||
- name: tmp
|
- name: tmp
|
||||||
mountPath: /tmp
|
mountPath: /tmp
|
||||||
- name: logs
|
- name: logs
|
||||||
mountPath: /app/logs
|
mountPath: /home/app/logs
|
||||||
volumes:
|
volumes:
|
||||||
- name: vector-store
|
- name: vector-store
|
||||||
persistentVolumeClaim:
|
persistentVolumeClaim:
|
||||||
@@ -206,8 +283,12 @@ spec:
|
|||||||
kind: ClusterIssuer
|
kind: ClusterIssuer
|
||||||
dnsNames:
|
dnsNames:
|
||||||
- knowledge.iamworkin.lan
|
- knowledge.iamworkin.lan
|
||||||
duration: 2160h # 90d
|
# step-ca ACME caps lifetime at 30d; requesting 90d silently capped
|
||||||
renewBefore: 720h # 30d
|
# made renewBefore=cert-lifetime → perpetual renewal loop (10888+ CRs
|
||||||
|
# in 18h on 2026-05-07). Match working 720h/240h pattern from other
|
||||||
|
# FC services.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
---
|
---
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
kind: IngressRoute
|
kind: IngressRoute
|
||||||
|
|||||||
93
apps/kubevirt-vms/ci1.yaml
Normal file
93
apps/kubevirt-vms/ci1.yaml
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# ci1 - Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||||
|
# =============================================================================
|
||||||
|
# Boots from the sysprepped containerDisk template built by the Windows VM
|
||||||
|
# sysprep pipeline. See docs/infrastructure/windows-vm-sysprep-pipeline.md.
|
||||||
|
# Path A/B/C install history is preserved in git log only.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: kubevirt-stack
|
||||||
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: kubevirt.io/v1
|
||||||
|
kind: VirtualMachine
|
||||||
|
metadata:
|
||||||
|
name: ci1
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
runStrategy: Halted
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
role: github-actions-runner
|
||||||
|
kubevirt.io/vm: ci1
|
||||||
|
spec:
|
||||||
|
domain:
|
||||||
|
cpu:
|
||||||
|
cores: 8
|
||||||
|
sockets: 1
|
||||||
|
threads: 1
|
||||||
|
memory:
|
||||||
|
guest: 16Gi
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: 16Gi
|
||||||
|
limits:
|
||||||
|
memory: 16Gi
|
||||||
|
clock:
|
||||||
|
utc: {}
|
||||||
|
timer:
|
||||||
|
hpet:
|
||||||
|
present: false
|
||||||
|
pit:
|
||||||
|
tickPolicy: delay
|
||||||
|
rtc:
|
||||||
|
tickPolicy: catchup
|
||||||
|
hyperv: {}
|
||||||
|
features:
|
||||||
|
acpi: {}
|
||||||
|
apic: {}
|
||||||
|
hyperv:
|
||||||
|
relaxed: {}
|
||||||
|
vapic: {}
|
||||||
|
spinlocks:
|
||||||
|
spinlocks: 8191
|
||||||
|
smm: {}
|
||||||
|
firmware:
|
||||||
|
bootloader:
|
||||||
|
efi:
|
||||||
|
secureBoot: false
|
||||||
|
devices:
|
||||||
|
tpm: {}
|
||||||
|
disks:
|
||||||
|
- name: rootdisk
|
||||||
|
disk:
|
||||||
|
bus: virtio
|
||||||
|
interfaces:
|
||||||
|
# Pod-network fallback for CI runner outbound traffic. Switch to
|
||||||
|
# prod-vlan57 once the bridge/NAD lane is ready for L2 access.
|
||||||
|
- name: default
|
||||||
|
masquerade: {}
|
||||||
|
model: virtio
|
||||||
|
machine:
|
||||||
|
type: q35
|
||||||
|
networks:
|
||||||
|
- name: default
|
||||||
|
pod: {}
|
||||||
|
volumes:
|
||||||
|
- name: rootdisk
|
||||||
|
containerDisk:
|
||||||
|
image: localhost/fc-win-server-2025:v1
|
||||||
|
imagePullPolicy: Never
|
||||||
|
terminationGracePeriodSeconds: 3600
|
||||||
3
apps/kubevirt-vms/kustomization.yaml
Normal file
3
apps/kubevirt-vms/kustomization.yaml
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
resources:
|
||||||
|
- ci1.yaml
|
||||||
|
- prod-vlan57-nad.yaml
|
||||||
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
69
apps/kubevirt-vms/prod-vlan57-nad.yaml
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# NetworkAttachmentDefinition — PROD VLAN 57 bridge
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: makes KubeVirt VMs reachable on the PROD VLAN (10.0.57.0/24)
|
||||||
|
# alongside the existing pod network. Required for ci1 to bridge onto PROD
|
||||||
|
# (e.g. to provision/scrape edge1, edge2, kiosks, Pis on the same L2 segment).
|
||||||
|
#
|
||||||
|
# **DEPLOY GATE — Phase 1.5 host work required first**:
|
||||||
|
# On every RKE2 node (rke2-server, rke2-agent1, rke2-agent2):
|
||||||
|
# 1. Switch port (UniFi USL16LP) trunks VLAN 57 to the node — usually
|
||||||
|
# already true since BLUEJAY-WS reaches 10.0.57.x services. Verify
|
||||||
|
# with `ip link show enp86s0.57` after configuring sub-interface, OR
|
||||||
|
# `tcpdump -ni enp86s0 vlan 57` and ping a known PROD host.
|
||||||
|
# 2. Linux bridge `br-prod` enslaving `enp86s0.57` (VLAN sub-interface).
|
||||||
|
# NetworkManager profile examples in the runbook below.
|
||||||
|
# 3. Verify Multus DaemonSet `kube-multus-ds` is Ready on all nodes.
|
||||||
|
#
|
||||||
|
# Without those, applying this NAD has no effect except to register the CRD.
|
||||||
|
# A VM that requests this NAD with no bridge present will fail with:
|
||||||
|
# `error adding pod kubevirt-vms_ci1 to CNI network "prod-vlan57": failed to
|
||||||
|
# plumb VLAN: open /sys/class/net/br-prod/master: no such file or directory`
|
||||||
|
#
|
||||||
|
# Configuration notes:
|
||||||
|
# - cniVersion 0.3.1 to match Multus daemon-config.json
|
||||||
|
# - mtu 1500 (matches enp86s0 default; bump if jumbo frames configured)
|
||||||
|
# - bridge name `br-prod` is convention; if Puppet picks a different name
|
||||||
|
# (e.g. `br57`, `br-vlan57`), edit BOTH this NAD and the ci1.yaml
|
||||||
|
# interface block. Keep them in sync.
|
||||||
|
# - vlan: 0 because the host bridge already strips VLAN tag (br-prod sits
|
||||||
|
# on top of `enp86s0.57`). If we instead used a VLAN-aware bridge with
|
||||||
|
# trunk port, set vlan: 57 here. Current convention is VLAN-stripped at
|
||||||
|
# the sub-interface, so the bridge passes untagged frames.
|
||||||
|
#
|
||||||
|
# Apply:
|
||||||
|
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/prod-vlan57-nad.yaml
|
||||||
|
#
|
||||||
|
# Then update ci1.yaml networks: stanza to:
|
||||||
|
# - name: prod-net
|
||||||
|
# multus:
|
||||||
|
# networkName: kubevirt-vms/prod-vlan57
|
||||||
|
# and the interface block from `masquerade` to `bridge`.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
# Namespace must exist already (created by ci1.yaml's first document).
|
||||||
|
# This file imports a NAD into that same namespace.
|
||||||
|
apiVersion: k8s.cni.cncf.io/v1
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
metadata:
|
||||||
|
name: prod-vlan57
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/host-bridge: "br-prod (enslaves enp86s0.57)"
|
||||||
|
bluejay.iamworkin.lan/cidr: "10.0.57.0/24"
|
||||||
|
bluejay.iamworkin.lan/gateway: "10.0.57.1"
|
||||||
|
bluejay.iamworkin.lan/dns: "10.0.56.1 (pfSense Unbound)"
|
||||||
|
spec:
|
||||||
|
config: |
|
||||||
|
{
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"name": "prod-vlan57",
|
||||||
|
"type": "bridge",
|
||||||
|
"bridge": "br-prod",
|
||||||
|
"ipam": {},
|
||||||
|
"mtu": 1500,
|
||||||
|
"vlan": 0,
|
||||||
|
"promiscMode": true,
|
||||||
|
"preserveDefaultVlan": false
|
||||||
|
}
|
||||||
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
99
apps/kubevirt-vms/win2025-iso-nfs-pv.yaml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Windows Server 2025 ISO — Static NFS PV (Path B for SATA-CDROM timeout)
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: Mount the ISO from Synology NAS via NFS instead of from a Longhorn-
|
||||||
|
# backed Filesystem PVC.
|
||||||
|
#
|
||||||
|
# Why: SATA-CDROM emulation reading from a Longhorn-backed Filesystem PVC is
|
||||||
|
# too slow for OVMF's boot read window — the DVD-ROM enumeration times out
|
||||||
|
# before the bootloader can be read. Symptom on the serial console:
|
||||||
|
# BdsDxe: failed to start Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ...
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found
|
||||||
|
# Diagnosis confirmed the ISO content is a perfectly valid bootable ISO9660
|
||||||
|
# image — the bug is in the timing path between OVMF and Longhorn-backed
|
||||||
|
# storage, not in the ISO itself.
|
||||||
|
#
|
||||||
|
# Block-mode PVC was tried (`volumeMode: Block` via DataVolume) and would
|
||||||
|
# likely fix the timing, but CDI v1.65.0's upload-target pod cannot open the
|
||||||
|
# block device due to runAsUser:107 + capabilities.drop:[ALL] and we got:
|
||||||
|
# blockdev: cannot open /dev/cdi-block-volume: Permission denied
|
||||||
|
#
|
||||||
|
# NFS-mounted ISO bypasses both issues: no Longhorn slowness, no CDI upload
|
||||||
|
# pod permission concerns. The ISO is read directly from the NAS over a
|
||||||
|
# native NFSv4.1 mount that QEMU's SATA emulator can read at full LAN speed.
|
||||||
|
#
|
||||||
|
# Layout on Synology:
|
||||||
|
# /volume1/ISOs/ (existing export, RKE2 ACL)
|
||||||
|
# en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso
|
||||||
|
# win2025-iso-disk/ (new subdir, 2026-05-08)
|
||||||
|
# disk.img -> hardlink to ../en-us_windows_server_2025_..._8e06425a.iso
|
||||||
|
#
|
||||||
|
# KubeVirt's launcher pod expects a PVC mounted at
|
||||||
|
# /var/run/kubevirt-private/vmi-disks/<diskName>/disk.img — by mounting the
|
||||||
|
# `win2025-iso-disk/` subdir as the NFS PV root, `disk.img` lives at the PV's
|
||||||
|
# root and KubeVirt's CDROM emulator finds it without any path manipulation.
|
||||||
|
#
|
||||||
|
# A symlink would NOT work for sub-path NFS mounts (the relative target
|
||||||
|
# `../...iso` falls outside the sub-mount root). A hardlink works because it
|
||||||
|
# references the same inode regardless of mount point.
|
||||||
|
#
|
||||||
|
# Memory references:
|
||||||
|
# - feedback_synology_nfs_volume1_kubernetes_export_scoped (Synology export
|
||||||
|
# scoping pattern — but /volume1/ISOs export, unlike /volume1/kubernetes,
|
||||||
|
# does support sub-path mounts because Synology NFS is configured with
|
||||||
|
# pseudo-fs in NFSv4.1)
|
||||||
|
# - feedback_kubevirt_iso_first_install_bootorder_and_runstrategy (boot
|
||||||
|
# order / runStrategy gotchas, separate from the storage timing issue)
|
||||||
|
#
|
||||||
|
# Validation (2026-05-08, from rke2-server / rke2-agent1 / rke2-agent2):
|
||||||
|
# mount -t nfs -o nfsvers=4.1,ro 10.0.58.3:/volume1/ISOs/win2025-iso-disk /tmp/m
|
||||||
|
# file /tmp/m/disk.img
|
||||||
|
# -> ISO 9660 CD-ROM filesystem data 'SSS_X64FRE_EN-US_DV9' (bootable)
|
||||||
|
# All 3 RKE2 nodes can mount and read.
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolume
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
labels:
|
||||||
|
flowercore.io/iso: windows-server-2025
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
capacity:
|
||||||
|
storage: 8Gi
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
persistentVolumeReclaimPolicy: Retain
|
||||||
|
storageClassName: "" # static, no provisioner
|
||||||
|
mountOptions:
|
||||||
|
- nfsvers=4.1
|
||||||
|
- ro
|
||||||
|
- hard
|
||||||
|
- timeo=600
|
||||||
|
- retrans=3
|
||||||
|
nfs:
|
||||||
|
server: 10.0.58.3 # BlueJayNAS Synology DS1621+ on HOME VLAN 58
|
||||||
|
path: /volume1/ISOs/win2025-iso-disk
|
||||||
|
readOnly: true
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso-nfs
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadOnlyMany
|
||||||
|
volumeMode: Filesystem
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 8Gi
|
||||||
|
storageClassName: ""
|
||||||
|
volumeName: windows-server-2025-iso-nfs
|
||||||
@@ -207,20 +207,13 @@ spec:
|
|||||||
- port: 993
|
- port: 993
|
||||||
targetPort: 993
|
targetPort: 993
|
||||||
name: imaps
|
name: imaps
|
||||||
---
|
# --- mail-tls Certificate REMOVED 2026-06-01 ---
|
||||||
# TLS Certificate via cert-manager
|
# mail-tls is now managed OUTSIDE cert-manager: issued from step-ca's JWK 'admin'
|
||||||
apiVersion: cert-manager.io/v1
|
# provisioner and auto-renewed by a systemd timer on noc1 (step ca renew), which
|
||||||
kind: Certificate
|
# writes the mail-tls secret directly. step-ca-acme only has an HTTP-01 (Traefik)
|
||||||
metadata:
|
# solver, but mail.iamworkin.lan must resolve to the dedicated MetalLB IP 10.0.56.202
|
||||||
name: mail-tls
|
# (SMTP/IMAP), so HTTP-01 cannot validate. Do NOT re-add a cert-manager Certificate
|
||||||
namespace: mail
|
# here unless a DNS-01 solver is deployed for step-ca-acme.
|
||||||
spec:
|
|
||||||
secretName: mail-tls
|
|
||||||
issuerRef:
|
|
||||||
name: step-ca-acme
|
|
||||||
kind: ClusterIssuer
|
|
||||||
dnsNames:
|
|
||||||
- mail.iamworkin.lan
|
|
||||||
---
|
---
|
||||||
# Traefik IngressRoute - Webmail placeholder
|
# Traefik IngressRoute - Webmail placeholder
|
||||||
apiVersion: traefik.io/v1alpha1
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
|||||||
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
762
apps/monitoring/fc-updatecenter-dashboard.grafana.txt
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"id": null,
|
||||||
|
"links": [
|
||||||
|
{
|
||||||
|
"icon": "external link",
|
||||||
|
"includeVars": false,
|
||||||
|
"keepTime": false,
|
||||||
|
"targetBlank": true,
|
||||||
|
"title": "Open Service",
|
||||||
|
"type": "link",
|
||||||
|
"url": "https://updatecenter.iamworkin.lan/"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"mappings": [
|
||||||
|
{
|
||||||
|
"options": {
|
||||||
|
"0": {
|
||||||
|
"color": "#f87171",
|
||||||
|
"index": 1,
|
||||||
|
"text": "DOWN"
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"color": "#4ade80",
|
||||||
|
"index": 0,
|
||||||
|
"text": "UP"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "value"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Availability"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Service Availability",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"decimals": 2,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 95
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#FFB300",
|
||||||
|
"value": 99
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 99.9
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "percent"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 8,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background_solid",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "avg_over_time(probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}[24h]) * 100",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "24h Uptime"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24-Hour Uptime",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"max": 30,
|
||||||
|
"min": 0,
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": 7
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "d"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 4,
|
||||||
|
"w": 8,
|
||||||
|
"x": 16,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"minVizHeight": 75,
|
||||||
|
"minVizWidth": 75,
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showThresholdLabels": false,
|
||||||
|
"showThresholdMarkers": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "(probe_ssl_earliest_cert_expiry{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"} - time()) / 86400",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Days Remaining"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Cert Expiry (Days)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "gauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"axisBorderShow": false,
|
||||||
|
"axisCenteredZero": false,
|
||||||
|
"axisColorMode": "text",
|
||||||
|
"axisLabel": "Response Time (seconds)",
|
||||||
|
"drawStyle": "line",
|
||||||
|
"fillOpacity": 12,
|
||||||
|
"gradientMode": "scheme",
|
||||||
|
"lineInterpolation": "smooth",
|
||||||
|
"lineWidth": 2,
|
||||||
|
"pointSize": 4,
|
||||||
|
"showPoints": "never",
|
||||||
|
"spanNulls": true,
|
||||||
|
"thresholdsStyle": {
|
||||||
|
"mode": "dashed"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "#4ade80",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#fbbf24",
|
||||||
|
"value": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "#f87171",
|
||||||
|
"value": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "s"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 14,
|
||||||
|
"x": 0,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull",
|
||||||
|
"mean",
|
||||||
|
"max"
|
||||||
|
],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "probe_duration_seconds{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Probe Duration"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timeFrom": "1h",
|
||||||
|
"title": "Response Time (1h Trend)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 10,
|
||||||
|
"x": 14,
|
||||||
|
"y": 4
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"alertInstanceLabelFilter": "{instance=\"updatecenter.iamworkin.lan\"}",
|
||||||
|
"alertName": "",
|
||||||
|
"dashboardAlerts": false,
|
||||||
|
"groupBy": [],
|
||||||
|
"groupMode": "default",
|
||||||
|
"maxItems": 10,
|
||||||
|
"sortOrder": 1,
|
||||||
|
"stateFilter": {
|
||||||
|
"error": true,
|
||||||
|
"firing": true,
|
||||||
|
"noData": true,
|
||||||
|
"normal": false,
|
||||||
|
"pending": true
|
||||||
|
},
|
||||||
|
"viewMode": "list"
|
||||||
|
},
|
||||||
|
"title": "Active Alerts",
|
||||||
|
"type": "alertlist"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"collapsed": false,
|
||||||
|
"gridPos": {
|
||||||
|
"h": 1,
|
||||||
|
"w": 24,
|
||||||
|
"x": 0,
|
||||||
|
"y": 12
|
||||||
|
},
|
||||||
|
"id": 20,
|
||||||
|
"title": "OTEL Counters — Track 1D",
|
||||||
|
"type": "row"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 21,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_manifest_requests_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Manifest Requests rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 13
|
||||||
|
},
|
||||||
|
"id": 22,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug) (rate(updatecenter_bundle_download_bytes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Throughput by slug (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 23,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (status) (rate(updatecenter_checkins_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "status={{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Agent Check-in Rate by status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "thresholds"
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 1 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "none",
|
||||||
|
"decimals": 2
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 24,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": ["sum"],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "value_and_name"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "increase(updatecenter_signature_verify_failures_total[1h])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig Verify Failures (1h)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failures (1h)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 21
|
||||||
|
},
|
||||||
|
"id": 25,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (slug, channel) (rate(updatecenter_release_publishes_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{slug}}/{{channel}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Release Publishes rate by slug/channel (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 1,
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 26,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "sum by (kind, status) (rate(updatecenter_bundle_downloads_total[5m]))",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "{{kind}} / {{status}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Bundle Download Requests by kind/status (5m)",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"custom": {
|
||||||
|
"lineWidth": 2,
|
||||||
|
"fillOpacity": 20
|
||||||
|
},
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "#4ade80", "value": null },
|
||||||
|
{ "color": "#f87171", "value": 0.01 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 29
|
||||||
|
},
|
||||||
|
"id": 27,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "right",
|
||||||
|
"calcs": ["mean", "lastNotNull"]
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "desc"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "fffjikve8llhce"
|
||||||
|
},
|
||||||
|
"expr": "rate(updatecenter_signature_verify_failures_total[5m])",
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Sig verify failures/s"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Signature Verify Failure Rate (5m) — Critical if >0",
|
||||||
|
"transparent": true,
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"blue-jay",
|
||||||
|
"flowercore",
|
||||||
|
"synthetic",
|
||||||
|
"updatecenter",
|
||||||
|
"otel"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "FlowerCore.UpdateCenter Dashboard",
|
||||||
|
"uid": "fc-updatecenter",
|
||||||
|
"version": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,226 @@
|
|||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum by (event) (increase(fc_desktop_session_events_total[$__rate_interval]))",
|
||||||
|
"legendFormat": "{{event}}",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "RemoteDesktop Session Events",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"showUnfilled": true
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum by (template, event) (increase(fc_desktop_session_events_total[24h]))",
|
||||||
|
"legendFormat": "{{template}} {{event}}",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24h Session Events By Template",
|
||||||
|
"type": "bargauge"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom"
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "single"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "fc_desktop_pool_ready",
|
||||||
|
"legendFormat": "{{template}} ready",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "fc_desktop_pool_desired",
|
||||||
|
"legendFormat": "{{template}} desired",
|
||||||
|
"range": true,
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Warm Pool Ready vs Desired",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS}"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {
|
||||||
|
"mode": "palette-classic"
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"color": "orange",
|
||||||
|
"value": 1
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "short"
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "sum(increase(fc_desktop_session_events_total{event=\"connect\",browser_datasource=\"json\"}[24h])) - sum(increase(fc_desktop_session_events_total{event=\"disconnect\"}[24h]))",
|
||||||
|
"range": true,
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "24h Connect Minus Disconnect",
|
||||||
|
"type": "stat"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": [
|
||||||
|
"flowercore",
|
||||||
|
"remotedesktop",
|
||||||
|
"guacamole"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-24h",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "FlowerCore RemoteDesktop",
|
||||||
|
"uid": "flowercore-remotedesktop",
|
||||||
|
"version": 1
|
||||||
|
}
|
||||||
@@ -75,6 +75,20 @@ data:
|
|||||||
cluster: "rke2"
|
cluster: "rke2"
|
||||||
role: "agent"
|
role: "agent"
|
||||||
|
|
||||||
|
# Mac mini macOS runner node (INFRA VLAN)
|
||||||
|
- job_name: "macmini-node"
|
||||||
|
scrape_timeout: 15s
|
||||||
|
static_configs:
|
||||||
|
- targets: ["10.0.56.115:9100"]
|
||||||
|
labels:
|
||||||
|
instance: "macmini"
|
||||||
|
host: "macmini.iamworkin.lan"
|
||||||
|
vlan: "infra"
|
||||||
|
arch: "arm64"
|
||||||
|
role: "macos-runner"
|
||||||
|
puppet_managed: "true"
|
||||||
|
puppet_server: "puppet.iamworkin.lan"
|
||||||
|
|
||||||
# In-cluster node-exporter DaemonSet
|
# In-cluster node-exporter DaemonSet
|
||||||
- job_name: "k8s-node-exporter"
|
- job_name: "k8s-node-exporter"
|
||||||
kubernetes_sd_configs:
|
kubernetes_sd_configs:
|
||||||
@@ -209,7 +223,7 @@ data:
|
|||||||
service: "pimanager"
|
service: "pimanager"
|
||||||
vlan: "home"
|
vlan: "home"
|
||||||
device: "pi4-ezconnect"
|
device: "pi4-ezconnect"
|
||||||
- targets: ["10.0.58.113:5100"]
|
- targets: ["10.0.58.113:5200"]
|
||||||
labels:
|
labels:
|
||||||
instance: "pirelay"
|
instance: "pirelay"
|
||||||
service: "pimanager"
|
service: "pimanager"
|
||||||
@@ -266,13 +280,14 @@ data:
|
|||||||
printer_model: "NuPrint 210"
|
printer_model: "NuPrint 210"
|
||||||
|
|
||||||
# Print.Web health (Blazor app on edge2:5200)
|
# Print.Web health (Blazor app on edge2:5200)
|
||||||
|
# Target `/health` (anonymous) — root path requires API key auth and returns 401.
|
||||||
- job_name: "probe-printweb"
|
- job_name: "probe-printweb"
|
||||||
metrics_path: /probe
|
metrics_path: /probe
|
||||||
params:
|
params:
|
||||||
module: [http_2xx]
|
module: [http_2xx]
|
||||||
scrape_interval: 30s
|
scrape_interval: 30s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ["http://10.0.57.16:5200/"]
|
- targets: ["http://10.0.57.16:5200/health"]
|
||||||
labels:
|
labels:
|
||||||
instance: "print-web"
|
instance: "print-web"
|
||||||
service: "print-web"
|
service: "print-web"
|
||||||
@@ -464,11 +479,11 @@ data:
|
|||||||
- "https://gitea.iamworkin.lan/"
|
- "https://gitea.iamworkin.lan/"
|
||||||
- "https://argocd.iamworkin.lan/"
|
- "https://argocd.iamworkin.lan/"
|
||||||
- "https://intranet.iamworkin.lan/"
|
- "https://intranet.iamworkin.lan/"
|
||||||
- "https://signage.iamworkin.lan/"
|
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://kiosk.iamworkin.lan/"
|
- "https://kiosk.iamworkin.lan/"
|
||||||
- "https://media.iamworkin.lan/"
|
- "https://media.iamworkin.lan/"
|
||||||
- "https://mysql.iamworkin.lan/"
|
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://php.iamworkin.lan/"
|
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
|
||||||
- "https://zabbix.iamworkin.lan/"
|
- "https://zabbix.iamworkin.lan/"
|
||||||
- "https://desktop.iamworkin.lan/"
|
- "https://desktop.iamworkin.lan/"
|
||||||
- "https://print.iamworkin.lan/"
|
- "https://print.iamworkin.lan/"
|
||||||
@@ -697,6 +712,36 @@ data:
|
|||||||
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
||||||
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
||||||
|
|
||||||
|
- name: macmini-runners
|
||||||
|
rules:
|
||||||
|
- alert: MacMiniRunnerOffline
|
||||||
|
expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
annotations:
|
||||||
|
summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
|
||||||
|
description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
|
||||||
|
|
||||||
|
- name: linux-runners
|
||||||
|
rules:
|
||||||
|
- alert: LinuxRunnerOffline
|
||||||
|
expr: |
|
||||||
|
kube_deployment_status_replicas_ready{
|
||||||
|
namespace="github-runner",
|
||||||
|
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
|
||||||
|
} == 0
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
service: github-runner
|
||||||
|
team: ci
|
||||||
|
annotations:
|
||||||
|
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||||
|
description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
|
||||||
|
|
||||||
- name: remote-desktop
|
- name: remote-desktop
|
||||||
rules:
|
rules:
|
||||||
- alert: RemoteDesktopWebDown
|
- alert: RemoteDesktopWebDown
|
||||||
@@ -922,6 +967,52 @@ data:
|
|||||||
annotations:
|
annotations:
|
||||||
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
|
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
|
||||||
|
|
||||||
|
# Puppet agent + service alerts.
|
||||||
|
# Mirror of FlowerCore.Notes/scripts/monitoring/alerts.yml `puppet` group
|
||||||
|
# so a future migration to in-cluster Prometheus inherits the ruleset.
|
||||||
|
# Source-of-truth for the live Podman Prometheus on noc1 is the Notes file.
|
||||||
|
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||||
|
- name: puppet
|
||||||
|
rules:
|
||||||
|
- alert: PuppetAgentReportStale
|
||||||
|
expr: puppet_last_run_age_seconds > 7200
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet agent {{ $labels.instance }} hasn't reported in over 2h"
|
||||||
|
description: "Last run age: {{ $value | humanizeDuration }}. The puppet agent on {{ $labels.instance }} may be stopped, the node may be powered off, or noc1 may be unreachable from this node."
|
||||||
|
runbook: "1. SSH to node (via noc1 jumpbox if needed) 2. sudo systemctl status puppet 3. sudo puppet agent -t --noop to force a run 4. Check r10k: ssh fcadmin@10.0.56.10 'sudo podman logs openvoxserver --tail 50' 5. Verify noc1 reachability: ping puppet.iamworkin.lan"
|
||||||
|
|
||||||
|
- alert: PuppetAgentReportCritical
|
||||||
|
expr: puppet_last_run_age_seconds > 86400
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet agent {{ $labels.instance }} silent for over 24h — node is unmanaged"
|
||||||
|
description: "Last run age: {{ $value | humanizeDuration }}. Node {{ $labels.instance }} has not submitted a Puppet report in over 24 hours. Config drift is accumulating — investigate immediately. If intentional (maintenance), add to the exclusion filter or silence in Grafana."
|
||||||
|
runbook: "URGENT: 1. Check node power state 2. SSH via noc1 jumpbox: ssh fcadmin@10.0.56.10 then ssh <node> 3. sudo systemctl status puppet 4. sudo systemctl start puppet + sudo puppet agent -t 5. Check for network partitions (VLAN connectivity to 10.0.56.10) 6. If node was recently reimaged: sudo puppet agent -t to re-register with new SSL cert"
|
||||||
|
|
||||||
|
# Sprint 33 Cx-7 Phase B (2026-05-25 postmortem follow-up):
|
||||||
|
# Detects puppet.service in failed state — distinct from PuppetAgentReportStale
|
||||||
|
# which catches "agent hasn't run." This catches "systemd gave up restarting it"
|
||||||
|
# (CA-verify loop or other fatal exit). Requires node-exporter systemd collector
|
||||||
|
# enabled with --collector.systemd. If `node_systemd_unit_state` has no series
|
||||||
|
# for a node, the collector is disabled there — flag in postmortem follow-up.
|
||||||
|
- alert: PuppetServiceFailed
|
||||||
|
expr: node_systemd_unit_state{name="puppet.service",state="failed"} == 1
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Puppet service failed on {{ $labels.instance }}"
|
||||||
|
description: "puppet.service on {{ $labels.instance }} has been in failed state for 5+ minutes. systemd has stopped auto-restarting (CA-verify-loop or other exit). Manual `systemctl status puppet` confirms. Run `sudo systemctl start puppet` to recover; investigate journal for root cause."
|
||||||
|
runbook_url: "https://github.com/astoltz/FlowerCore.Notes/blob/master/memory/feedback_puppet_service_dead_after_ca_loop_alert_misreads.md"
|
||||||
|
|
||||||
# K8s pod-state alerts. Require kube-state-metrics scrape (added
|
# K8s pod-state alerts. Require kube-state-metrics scrape (added
|
||||||
# 2026-04-26 — see scrape_configs above). Would have surfaced the
|
# 2026-04-26 — see scrape_configs above). Would have surfaced the
|
||||||
# agent-zero ollama-proxy 172x crash-loop instead of letting it
|
# agent-zero ollama-proxy 172x crash-loop instead of letting it
|
||||||
@@ -974,6 +1065,39 @@ data:
|
|||||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
||||||
|
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
||||||
|
# outage (21h) hit because no alert fired on the rising multus working
|
||||||
|
# set — only downstream blackbox / Traefik / service alerts. With
|
||||||
|
# 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
|
||||||
|
# runs ~150-250MiB so this only fires when an avalanche starts.
|
||||||
|
- alert: MultusMemoryPressure
|
||||||
|
expr: |
|
||||||
|
container_memory_working_set_bytes{container="kube-multus"}
|
||||||
|
/ container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: critical
|
||||||
|
alert_channel: thermal_print
|
||||||
|
annotations:
|
||||||
|
summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
|
||||||
|
description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
|
||||||
|
|
||||||
|
# Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
|
||||||
|
# operator-leak avalanche pattern BEFORE it cascades into a multus
|
||||||
|
# CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
|
||||||
|
# emitting pods without ownerReferences will accumulate them when
|
||||||
|
# the operator crashes. >25 pending pods in any namespace for 30m
|
||||||
|
# is the signal to investigate the reconciler.
|
||||||
|
- alert: NamespacePendingPodBacklog
|
||||||
|
expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
|
||||||
|
for: 30m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
|
||||||
|
description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
|
||||||
|
|
||||||
# Longhorn storage health alerts. Required: longhorn scrape job
|
# Longhorn storage health alerts. Required: longhorn scrape job
|
||||||
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
||||||
# for "snapshot becomes not ready to use" are transient lifecycle
|
# for "snapshot becomes not ready to use" are transient lifecycle
|
||||||
@@ -1024,6 +1148,72 @@ data:
|
|||||||
summary: "Longhorn node {{ $labels.node }} not Ready"
|
summary: "Longhorn node {{ $labels.node }} not Ready"
|
||||||
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# FC Signage Marquee Performance — Track 3 + 8 (2026-05-06)
|
||||||
|
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||||
|
# Source-of-truth for the live Podman Prometheus on noc1 is the
|
||||||
|
# Notes file; this K8s ConfigMap exists so a future migration to
|
||||||
|
# in-cluster Prometheus inherits the ruleset automatically.
|
||||||
|
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||||
|
# ============================================================
|
||||||
|
- name: fc-signage-marquee
|
||||||
|
rules:
|
||||||
|
- alert: MarqueeDroppedFramesHigh
|
||||||
|
expr: |
|
||||||
|
(
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_dropped_frames_total[5m]))
|
||||||
|
/
|
||||||
|
sum by (renderer, phase, node_id) (rate(marquee_render_latency_ms_count[5m]))
|
||||||
|
) > 0.05
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_dropped_frames_total[7d])
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee dropped-frame rate >5% on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Renderer {{ $labels.renderer }} on {{ $labels.node_id }} drops >5% of frames during {{ $labels.phase }}. Animation visibly stuttery."
|
||||||
|
|
||||||
|
- alert: MarqueeRenderLatencyP99High
|
||||||
|
expr: |
|
||||||
|
histogram_quantile(
|
||||||
|
0.99,
|
||||||
|
sum by (renderer, phase, node_id, le) (rate(marquee_render_latency_ms_bucket[5m]))
|
||||||
|
) > 16
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_render_latency_ms_bucket[7d])
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee render latency p99 > 16ms on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||||
|
description: "Per-frame render latency p99 has exceeded the Pi-class 16ms budget for 10 minutes."
|
||||||
|
|
||||||
|
- alert: MarqueeAnimationDurationDrift
|
||||||
|
expr: |
|
||||||
|
abs(
|
||||||
|
histogram_quantile(0.5, sum by (renderer, phase, le) (rate(marquee_animation_duration_ms_bucket[15m])))
|
||||||
|
-
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
)
|
||||||
|
/
|
||||||
|
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||||
|
> 0.10
|
||||||
|
unless on()
|
||||||
|
absent_over_time(marquee_animation_duration_ms_bucket[7d])
|
||||||
|
for: 15m
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
annotations:
|
||||||
|
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||||
|
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# ConfigMap: Blackbox Exporter Configuration
|
# ConfigMap: Blackbox Exporter Configuration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -1084,24 +1274,55 @@ metadata:
|
|||||||
data:
|
data:
|
||||||
notify.py: |
|
notify.py: |
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
|
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
|
||||||
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
|
|
||||||
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
|
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
|
||||||
|
/api/print/alert. Thermal printing is BATCHED into hourly digests by
|
||||||
|
default so the printer no longer spam-fires per Grafana webhook.
|
||||||
|
|
||||||
|
Routing (per Grafana webhook alert):
|
||||||
|
- IRC: always per-event (operator likes the stream)
|
||||||
|
- Thermal printer:
|
||||||
|
* severity in {critical,disaster,page} OR
|
||||||
|
label alert_channel=thermal_print_immediate -> print NOW
|
||||||
|
* label alert_channel=thermal_print -> enqueue into hourly digest
|
||||||
|
* everything else -> IRC only
|
||||||
|
- RESOLVED webhooks remove the alert from the digest buffer
|
||||||
|
|
||||||
|
Env vars (defaults preserve old behavior on first deploy):
|
||||||
|
THERMAL_PRINT_ENABLED default "true" - master kill switch
|
||||||
|
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
|
||||||
|
BATCH_MAX_PENDING default "50" - force-flush threshold
|
||||||
|
|
||||||
|
HTTP surface:
|
||||||
|
POST / - Grafana webhook entry
|
||||||
|
POST /flush - manual digest flush (idempotent)
|
||||||
|
GET / - status + config + buffer depth + stats
|
||||||
"""
|
"""
|
||||||
import json, socket, sys, time
|
import json, os, socket, sys, threading, time
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
from urllib.error import URLError
|
|
||||||
|
|
||||||
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
|
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
|
||||||
IRC_PORT = 6667
|
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
|
||||||
IRC_NICK = "grafana-bot"
|
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
|
||||||
IRC_CHANNEL = "#alerts"
|
|
||||||
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
|
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
|
||||||
PRINT_ENABLED = True
|
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
|
||||||
|
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
|
||||||
|
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
|
||||||
|
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
|
||||||
|
|
||||||
|
_buffer_lock = threading.Lock()
|
||||||
|
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
|
||||||
|
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
|
||||||
|
"buffer_resolved": 0, "started_at": time.time()}
|
||||||
|
|
||||||
def send_irc(message):
|
def send_irc(message):
|
||||||
"""Connect, handle PING, join, send, quit."""
|
|
||||||
try:
|
try:
|
||||||
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
||||||
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
||||||
@@ -1134,52 +1355,137 @@ data:
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
sock.sendall(b"QUIT :alert delivered\r\n")
|
sock.sendall(b"QUIT :alert delivered\r\n")
|
||||||
sock.close()
|
sock.close()
|
||||||
|
_stats["irc_sent"] += 1
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def send_thermal_print(alert):
|
def post_thermal(payload, kind):
|
||||||
if not PRINT_ENABLED: return
|
if not THERMAL_PRINT_ENABLED:
|
||||||
labels = alert.get("labels", {})
|
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
|
||||||
annotations = alert.get("annotations", {})
|
return False
|
||||||
status = alert.get("status", "firing").upper()
|
|
||||||
summary = annotations.get("summary", "")
|
|
||||||
description = annotations.get("description", "")
|
|
||||||
runbook = annotations.get("runbook", "")
|
|
||||||
# Build a useful message: summary + description + runbook steps
|
|
||||||
parts = []
|
|
||||||
if summary: parts.append(summary)
|
|
||||||
if description and description != summary: parts.append(description)
|
|
||||||
if runbook: parts.append("STEPS: " + runbook)
|
|
||||||
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
|
|
||||||
payload = {
|
|
||||||
"title": labels.get("alertname", "Unknown"),
|
|
||||||
"severity": labels.get("severity", "warning").capitalize(),
|
|
||||||
"host": labels.get("instance", labels.get("host", "unknown")),
|
|
||||||
"message": message,
|
|
||||||
"eventId": alert.get("fingerprint", ""),
|
|
||||||
"source": "Grafana",
|
|
||||||
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
|
|
||||||
"acknowledged": False
|
|
||||||
}
|
|
||||||
try:
|
try:
|
||||||
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
||||||
headers={"Content-Type": "application/json"}, method="POST")
|
headers={"Content-Type": "application/json"}, method="POST")
|
||||||
resp = urlopen(req, timeout=10)
|
resp = urlopen(req, timeout=10)
|
||||||
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
|
if kind == "immediate": _stats["print_immediate"] += 1
|
||||||
|
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
|
||||||
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
|
||||||
|
|
||||||
def should_print(alert):
|
|
||||||
labels = alert.get("labels", {})
|
|
||||||
if labels.get("alert_channel") == "thermal_print": return True
|
|
||||||
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
|
|
||||||
if alert.get("status", "").upper() == "RESOLVED": return False
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def fingerprint_of(alert):
|
||||||
|
fp = alert.get("fingerprint", "")
|
||||||
|
if fp: return fp
|
||||||
|
labels = alert.get("labels", {})
|
||||||
|
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
|
||||||
|
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
|
||||||
|
|
||||||
|
def is_critical(alert):
|
||||||
|
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
|
||||||
|
|
||||||
|
def is_immediate_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
|
||||||
|
|
||||||
|
def is_batched_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
|
||||||
|
|
||||||
|
def add_to_digest(alert):
|
||||||
|
"""Add an alert to the digest buffer. Returns True if the buffer GREW
|
||||||
|
(new fingerprint), False if it was a dedup, resolution, or no-op.
|
||||||
|
"""
|
||||||
|
if not THERMAL_PRINT_ENABLED: return False
|
||||||
|
fp = fingerprint_of(alert)
|
||||||
|
status = alert.get("status", "firing").lower()
|
||||||
|
with _buffer_lock:
|
||||||
|
if status == "resolved":
|
||||||
|
if fp in _buffer:
|
||||||
|
del _buffer[fp]
|
||||||
|
_stats["buffer_resolved"] += 1
|
||||||
|
return False
|
||||||
|
if fp in _buffer:
|
||||||
|
_buffer[fp]["last_seen"] = time.time()
|
||||||
|
_buffer[fp]["alert"] = alert
|
||||||
|
_stats["buffer_dedup"] += 1
|
||||||
|
return False
|
||||||
|
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
|
||||||
|
_stats["buffer_added"] += 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
def build_digest_payload():
|
||||||
|
with _buffer_lock:
|
||||||
|
items = list(_buffer.values())
|
||||||
|
if not items: return None
|
||||||
|
by_name = defaultdict(list)
|
||||||
|
for item in items:
|
||||||
|
labels = item["alert"].get("labels", {})
|
||||||
|
by_name[labels.get("alertname", "Unknown")].append(item)
|
||||||
|
lines = []
|
||||||
|
for name, group in sorted(by_name.items()):
|
||||||
|
targets = []
|
||||||
|
for it in group[:5]:
|
||||||
|
labels = it["alert"].get("labels", {})
|
||||||
|
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
|
||||||
|
or labels.get("statefulset") or labels.get("namespace") or "?")
|
||||||
|
targets.append(t)
|
||||||
|
more = f" (+{len(group)-5})" if len(group) > 5 else ""
|
||||||
|
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
|
||||||
|
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
|
||||||
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||||
|
title = f"Alert digest: {len(items)} firing"
|
||||||
|
body = "\n".join([
|
||||||
|
f"=== {title} ===",
|
||||||
|
f"as of {now}",
|
||||||
|
"",
|
||||||
|
*lines,
|
||||||
|
"",
|
||||||
|
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
|
||||||
|
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
|
||||||
|
])
|
||||||
|
return {"title": title, "severity": "Warning", "host": "monitoring",
|
||||||
|
"message": body, "eventId": f"digest-{int(time.time())}",
|
||||||
|
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
|
||||||
|
|
||||||
|
def flush_digest():
|
||||||
|
payload = build_digest_payload()
|
||||||
|
if payload is None:
|
||||||
|
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
sent = post_thermal(payload, "digest")
|
||||||
|
with _buffer_lock:
|
||||||
|
_buffer.clear()
|
||||||
|
if sent: _stats["digest_flushed"] += 1
|
||||||
|
return sent
|
||||||
|
|
||||||
|
def digest_loop():
|
||||||
|
global _last_flush_time
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
now = time.time()
|
||||||
|
elapsed = now - _last_flush_time
|
||||||
|
if elapsed >= BATCH_INTERVAL_MIN * 60:
|
||||||
|
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
elif len(_buffer) >= BATCH_MAX_PENDING:
|
||||||
|
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
time.sleep(15)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
|
if self.path == "/flush":
|
||||||
|
ok = flush_digest()
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
|
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
|
||||||
|
return
|
||||||
|
_stats["webhooks_received"] += 1
|
||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
body = json.loads(self.rfile.read(length)) if length else {}
|
body = json.loads(self.rfile.read(length)) if length else {}
|
||||||
for alert in body.get("alerts", []):
|
for alert in body.get("alerts", []):
|
||||||
@@ -1194,22 +1500,56 @@ data:
|
|||||||
msg = f"{icon}{sev_tag} {name}: {summary}"
|
msg = f"{icon}{sev_tag} {name}: {summary}"
|
||||||
if desc: msg += f"\n {desc}"
|
if desc: msg += f"\n {desc}"
|
||||||
send_irc(msg)
|
send_irc(msg)
|
||||||
if should_print(alert): send_thermal_print(alert)
|
# Thermal routing — EVERYTHING (including criticals) goes into
|
||||||
self.send_response(200)
|
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
|
||||||
self.send_header("Content-Type", "application/json")
|
# label bypasses, and even that flushes-the-current-digest rather
|
||||||
self.end_headers()
|
# than printing a standalone job, so the same fingerprint can't
|
||||||
|
# spam the printer per webhook cycle.
|
||||||
|
if status == "RESOLVED":
|
||||||
|
add_to_digest(alert) # removes from buffer
|
||||||
|
continue
|
||||||
|
if is_immediate_label(alert):
|
||||||
|
# Explicit opt-in for "paper this NOW" — first arrival of a
|
||||||
|
# new fingerprint triggers an immediate digest flush; repeat
|
||||||
|
# webhooks for the same fingerprint dedupe in the buffer
|
||||||
|
# until the next interval or until the alert resolves.
|
||||||
|
new_in_buffer = add_to_digest(alert)
|
||||||
|
if new_in_buffer:
|
||||||
|
global _last_flush_time
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
elif is_critical(alert) or is_batched_label(alert):
|
||||||
|
add_to_digest(alert)
|
||||||
|
# else: IRC-only (warnings without thermal_print label)
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.wfile.write(b'{"status":"ok"}')
|
self.wfile.write(b'{"status":"ok"}')
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self.send_response(200)
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.send_header("Content-Type", "application/json")
|
with _buffer_lock:
|
||||||
self.end_headers()
|
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
|
||||||
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
|
depth = len(_buffer)
|
||||||
|
info = {
|
||||||
|
"service": "irc-notify",
|
||||||
|
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
|
||||||
|
"batch_interval_min": BATCH_INTERVAL_MIN,
|
||||||
|
"batch_max_pending": BATCH_MAX_PENDING,
|
||||||
|
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
|
||||||
|
"print_web_url": PRINT_WEB_URL},
|
||||||
|
"buffer": {"depth": depth, "alertnames": alertnames,
|
||||||
|
"seconds_since_last_flush": int(time.time() - _last_flush_time),
|
||||||
|
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
|
||||||
|
"stats": _stats,
|
||||||
|
}
|
||||||
|
self.wfile.write(json.dumps(info, indent=2).encode())
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
threading.Thread(target=digest_loop, daemon=True).start()
|
||||||
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
||||||
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
|
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -3296,6 +3636,39 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
||||||
|
- orgId: 1
|
||||||
|
name: CI Runners
|
||||||
|
folder: CI Alerts
|
||||||
|
interval: 1m
|
||||||
|
rules:
|
||||||
|
- uid: linux-runner-offline
|
||||||
|
title: LinuxRunnerOffline
|
||||||
|
condition: C
|
||||||
|
for: 5m
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Error
|
||||||
|
annotations:
|
||||||
|
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
||||||
|
description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
|
||||||
|
runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
alert_channel: irc
|
||||||
|
team: ci
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
name: Infrastructure
|
name: Infrastructure
|
||||||
folder: AI Stack Alerts
|
folder: AI Stack Alerts
|
||||||
@@ -3328,6 +3701,32 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
|
- uid: macmini-runner-offline
|
||||||
|
title: MacMiniRunnerOffline
|
||||||
|
condition: C
|
||||||
|
for: 10m
|
||||||
|
noDataState: Alerting
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: Mac mini GitHub runner offline
|
||||||
|
description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
|
||||||
|
runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
- uid: high-cpu
|
- uid: high-cpu
|
||||||
title: High CPU (>85%)
|
title: High CPU (>85%)
|
||||||
condition: C
|
condition: C
|
||||||
|
|||||||
297
apps/multus/multus.yaml
Normal file
297
apps/multus/multus.yaml
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# =============================================================================
|
||||||
|
# Multus CNI — Meta-CNI for multi-network attachment to pods/VMs
|
||||||
|
# =============================================================================
|
||||||
|
# Purpose: enable KubeVirt VMs (and any future workload) to attach additional
|
||||||
|
# network interfaces beyond the default Calico-managed pod network. Required
|
||||||
|
# for ci1 (Windows Server 2025 KubeVirt VM) to bridge onto PROD VLAN 57.
|
||||||
|
#
|
||||||
|
# Source: upstream k8snetworkplumbingwg/multus-cni v4.2.2
|
||||||
|
# https://github.com/k8snetworkplumbingwg/multus-cni/blob/v4.2.2/deployments/multus-daemonset-thick.yml
|
||||||
|
#
|
||||||
|
# Inlined verbatim (with project header + version pin annotation) for
|
||||||
|
# reproducibility and air-gap safety. Bumping versions = edit this file +
|
||||||
|
# git push. ArgoCD picks up via the bluejay-infra ApplicationSet
|
||||||
|
# (apps/* directory generator on main).
|
||||||
|
#
|
||||||
|
# Why thick plugin (not thin):
|
||||||
|
# - Thick = daemon + thin shim binary; daemon handles NAD watch + CRD reads
|
||||||
|
# centrally so each pod's CNI ADD doesn't hit the K8s API server. Better
|
||||||
|
# for clusters with many NAD-using pods.
|
||||||
|
# - Thin = each CNI ADD process directly contacts K8s API. Simpler but
|
||||||
|
# scales worse and has more failure modes.
|
||||||
|
# - KubeVirt + multi-VM workload pattern fits thick perfectly.
|
||||||
|
#
|
||||||
|
# Cluster context (verified 2026-05-08):
|
||||||
|
# - RKE2 v1.34.5 on 3 nodes (rke2-server, rke2-agent1, rke2-agent2)
|
||||||
|
# - Calico CNI (Tigera-managed) at /etc/cni/net.d + /opt/cni/bin (default)
|
||||||
|
# - openSUSE Leap 16, kernel 6.12, containerd 2.1.5
|
||||||
|
# - host bridge for PROD VLAN 57 = `br-prod` (PUPPET HOST WORK — see Phase 1.5
|
||||||
|
# in docs/infrastructure/windows-server-build-runner-plan.md)
|
||||||
|
#
|
||||||
|
# Version pin: snapshot-thick → pinning to v4.2.2 release tag at deploy time
|
||||||
|
# would require a private mirror of the image. Upstream `snapshot-thick` tag
|
||||||
|
# is updated on every release, so for now we trust upstream + Calico's
|
||||||
|
# established pattern. Pin to a specific SHA256 once we mirror to Gitea OCI.
|
||||||
|
#
|
||||||
|
# Apply (once committed to bluejay-infra main, ApplicationSet auto-syncs):
|
||||||
|
# git add apps/multus/multus.yaml && git commit && git push origin main
|
||||||
|
# # ArgoCD `infra-multus` Application appears within 3 min via ApplicationSet
|
||||||
|
#
|
||||||
|
# Verify:
|
||||||
|
# kubectl -n kube-system get ds kube-multus-ds
|
||||||
|
# kubectl -n kube-system rollout status ds kube-multus-ds
|
||||||
|
# kubectl get crd network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
---
|
||||||
|
apiVersion: apiextensions.k8s.io/v1
|
||||||
|
kind: CustomResourceDefinition
|
||||||
|
metadata:
|
||||||
|
name: network-attachment-definitions.k8s.cni.cncf.io
|
||||||
|
annotations:
|
||||||
|
bluejay.iamworkin.lan/source: "k8snetworkplumbingwg/multus-cni v4.2.2"
|
||||||
|
spec:
|
||||||
|
group: k8s.cni.cncf.io
|
||||||
|
scope: Namespaced
|
||||||
|
names:
|
||||||
|
plural: network-attachment-definitions
|
||||||
|
singular: network-attachment-definition
|
||||||
|
kind: NetworkAttachmentDefinition
|
||||||
|
shortNames:
|
||||||
|
- net-attach-def
|
||||||
|
versions:
|
||||||
|
- name: v1
|
||||||
|
served: true
|
||||||
|
storage: true
|
||||||
|
schema:
|
||||||
|
openAPIV3Schema:
|
||||||
|
description: 'NetworkAttachmentDefinition is a CRD schema specified by the Network Plumbing
|
||||||
|
Working Group to express the intent for attaching pods to one or more logical or physical
|
||||||
|
networks. More information available at: https://github.com/k8snetworkplumbingwg/multi-net-spec'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
apiVersion:
|
||||||
|
type: string
|
||||||
|
kind:
|
||||||
|
type: string
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
spec:
|
||||||
|
description: 'NetworkAttachmentDefinition spec defines the desired state of a network attachment'
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
config:
|
||||||
|
description: 'NetworkAttachmentDefinition config is a JSON-formatted CNI configuration'
|
||||||
|
type: string
|
||||||
|
---
|
||||||
|
kind: ClusterRole
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["k8s.cni.cncf.io"]
|
||||||
|
resources:
|
||||||
|
- '*'
|
||||||
|
verbs:
|
||||||
|
- '*'
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
resources:
|
||||||
|
- pods
|
||||||
|
- pods/status
|
||||||
|
verbs:
|
||||||
|
- get
|
||||||
|
- list
|
||||||
|
- update
|
||||||
|
- watch
|
||||||
|
- apiGroups:
|
||||||
|
- ""
|
||||||
|
- events.k8s.io
|
||||||
|
resources:
|
||||||
|
- events
|
||||||
|
verbs:
|
||||||
|
- create
|
||||||
|
- patch
|
||||||
|
- update
|
||||||
|
---
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: multus
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: multus
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
kind: ConfigMap
|
||||||
|
apiVersion: v1
|
||||||
|
metadata:
|
||||||
|
name: multus-daemon-config
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
data:
|
||||||
|
daemon-config.json: |
|
||||||
|
{
|
||||||
|
"chrootDir": "/hostroot",
|
||||||
|
"cniVersion": "0.3.1",
|
||||||
|
"logLevel": "verbose",
|
||||||
|
"logToStderr": true,
|
||||||
|
"cniConfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusAutoconfigDir": "/host/etc/cni/net.d",
|
||||||
|
"multusConfigFile": "auto",
|
||||||
|
"socketDir": "/host/run/multus/"
|
||||||
|
}
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: kube-multus-ds
|
||||||
|
namespace: kube-system
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
name: multus
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
tier: node
|
||||||
|
app: multus
|
||||||
|
name: multus
|
||||||
|
spec:
|
||||||
|
hostNetwork: true
|
||||||
|
hostPID: true
|
||||||
|
tolerations:
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- operator: Exists
|
||||||
|
effect: NoExecute
|
||||||
|
serviceAccountName: multus
|
||||||
|
containers:
|
||||||
|
- name: kube-multus
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||||
|
# 2026-05-11: upstream default of 50Mi memory limit OOM-cascades when
|
||||||
|
# an operator-owned namespace accumulates >100 pending pods retrying
|
||||||
|
# CNI ADD. RemoteDesktop emitted 219 orphan rd-browser-only pods
|
||||||
|
# (missing OwnerReferences), kubelet's CNI ADD avalanche pushed multus
|
||||||
|
# over 50Mi, OOMKilled, restarted with even bigger backlog → loop.
|
||||||
|
# 21h cluster outage. See FlowerCore.Notes:
|
||||||
|
# feedback_multus_50mi_limit_oom_orphan_pod_avalanche.md
|
||||||
|
# 1Gi limit / 512Mi request comfortably handles a 200+ pod CNI
|
||||||
|
# catchup burst on 64GB nodes (nodes are <25% used in steady-state).
|
||||||
|
# Drop back toward 256Mi only after MultusMemoryPressure alert
|
||||||
|
# proves steady-state working set sits well below 200Mi.
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "512Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "100m"
|
||||||
|
memory: "1Gi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cni
|
||||||
|
mountPath: /host/etc/cni/net.d
|
||||||
|
# multus-daemon expects that cnibin path must be identical between pod and container host.
|
||||||
|
# e.g. if the cni bin is in '/opt/cni/bin' on the container host side, then it should be mount to '/opt/cni/bin' in multus-daemon,
|
||||||
|
# not to any other directory, like '/opt/bin' or '/usr/bin'.
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /opt/cni/bin
|
||||||
|
- name: host-run
|
||||||
|
mountPath: /host/run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
mountPath: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
mountPath: /var/lib/kubelet
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
mountPath: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
mountPath: /run/netns
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- name: multus-daemon-config
|
||||||
|
mountPath: /etc/cni/net.d/multus.d
|
||||||
|
readOnly: true
|
||||||
|
- name: hostroot
|
||||||
|
mountPath: /hostroot
|
||||||
|
mountPropagation: HostToContainer
|
||||||
|
- mountPath: /etc/cni/multus/net.d
|
||||||
|
name: multus-conf-dir
|
||||||
|
env:
|
||||||
|
- name: MULTUS_NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
initContainers:
|
||||||
|
- name: install-multus-binary
|
||||||
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
|
command:
|
||||||
|
- "sh"
|
||||||
|
- "-c"
|
||||||
|
- "cp /usr/src/multus-cni/bin/multus-shim /host/opt/cni/bin/multus-shim && cp /usr/src/multus-cni/bin/passthru /host/opt/cni/bin/passthru"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "10m"
|
||||||
|
memory: "15Mi"
|
||||||
|
securityContext:
|
||||||
|
privileged: true
|
||||||
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
volumeMounts:
|
||||||
|
- name: cnibin
|
||||||
|
mountPath: /host/opt/cni/bin
|
||||||
|
mountPropagation: Bidirectional
|
||||||
|
terminationGracePeriodSeconds: 10
|
||||||
|
volumes:
|
||||||
|
- name: cni
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/net.d
|
||||||
|
- name: cnibin
|
||||||
|
hostPath:
|
||||||
|
path: /opt/cni/bin
|
||||||
|
- name: hostroot
|
||||||
|
hostPath:
|
||||||
|
path: /
|
||||||
|
- name: multus-daemon-config
|
||||||
|
configMap:
|
||||||
|
name: multus-daemon-config
|
||||||
|
items:
|
||||||
|
- key: daemon-config.json
|
||||||
|
path: daemon-config.json
|
||||||
|
- name: host-run
|
||||||
|
hostPath:
|
||||||
|
path: /run
|
||||||
|
- name: host-var-lib-cni-multus
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/cni/multus
|
||||||
|
- name: host-var-lib-kubelet
|
||||||
|
hostPath:
|
||||||
|
path: /var/lib/kubelet
|
||||||
|
- name: host-run-k8s-cni-cncf-io
|
||||||
|
hostPath:
|
||||||
|
path: /run/k8s.cni.cncf.io
|
||||||
|
- name: host-run-netns
|
||||||
|
hostPath:
|
||||||
|
path: /run/netns/
|
||||||
|
- name: multus-conf-dir
|
||||||
|
hostPath:
|
||||||
|
path: /etc/cni/multus/net.d
|
||||||
@@ -219,6 +219,65 @@ spec:
|
|||||||
tls:
|
tls:
|
||||||
secretName: cockpit-tls
|
secretName: cockpit-tls
|
||||||
---
|
---
|
||||||
|
# ============================================================
|
||||||
|
# PuppetDB Dashboard - noc1:8080 (HTTP, web UI only)
|
||||||
|
# Agent-to-PuppetDB mTLS still uses port 8081 directly via Puppet CA
|
||||||
|
# (NOT via this proxy). See docs/infrastructure/cert-recovery-2026-04-28.md
|
||||||
|
# ============================================================
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: puppetdb-external
|
||||||
|
namespace: noc-proxy
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
targetPort: 8080
|
||||||
|
name: http
|
||||||
|
clusterIP: None
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Endpoints
|
||||||
|
metadata:
|
||||||
|
name: puppetdb-external
|
||||||
|
namespace: noc-proxy
|
||||||
|
subsets:
|
||||||
|
- addresses:
|
||||||
|
- ip: 10.0.56.10
|
||||||
|
ports:
|
||||||
|
- port: 8080
|
||||||
|
name: http
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: puppetdb-tls
|
||||||
|
namespace: noc-proxy
|
||||||
|
spec:
|
||||||
|
secretName: puppetdb-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- puppetdb.iamworkin.lan
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: puppetdb
|
||||||
|
namespace: noc-proxy
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`puppetdb.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: puppetdb-external
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: puppetdb-tls
|
||||||
|
---
|
||||||
# NetworkPolicy: allow Traefik ingress, allow egress to noc1
|
# NetworkPolicy: allow Traefik ingress, allow egress to noc1
|
||||||
apiVersion: networking.k8s.io/v1
|
apiVersion: networking.k8s.io/v1
|
||||||
kind: NetworkPolicy
|
kind: NetworkPolicy
|
||||||
@@ -242,6 +301,8 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- port: 3000
|
- port: 3000
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
- port: 9090
|
- port: 9090
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
- port: 9091
|
- port: 9091
|
||||||
|
|||||||
226
apps/selenium/network-policy.yaml
Normal file
226
apps/selenium/network-policy.yaml
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
# Selenium Grid NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Captured into bluejay-infra 2026-05-07 during the regroup audit. This
|
||||||
|
# NetworkPolicy was previously applied via `kubectl apply` directly to
|
||||||
|
# the cluster with no source-of-truth anywhere — a fresh cluster rebuild
|
||||||
|
# would have lost all of it (including the Selenium Grid → Traefik VIP
|
||||||
|
# allow rule for AAT runs against `*.iamworkin.lan` services).
|
||||||
|
#
|
||||||
|
# The Selenium Grid Deployment + Services themselves are still managed
|
||||||
|
# outside ArgoCD (deployed via raw kubectl from the original Selenium
|
||||||
|
# Grid bring-up). Migrating those into bluejay-infra is a separate lane —
|
||||||
|
# this commit only restores GitOps repeatability for the NetworkPolicy.
|
||||||
|
#
|
||||||
|
# Rules captured from the live cluster's `kubectl get netpol -n selenium
|
||||||
|
# selenium-netpol -o yaml` on 2026-05-07. Originally applied 2026-03-15
|
||||||
|
# (from `metadata.creationTimestamp` before the field was stripped).
|
||||||
|
#
|
||||||
|
# Allows:
|
||||||
|
# - Egress: CoreDNS, intra-namespace pod-to-pod (4442/4443/4444/5555),
|
||||||
|
# Traefik VIP for `*.iamworkin.lan` AAT runs, all FC namespaces on
|
||||||
|
# standard FC service ports (5100/5200/5300/5400/8080), pod CIDR
|
||||||
|
# (10.42.0.0/16) + service CIDR (10.43.0.0/16) for the same ports,
|
||||||
|
# LAN gateway range (10.0.56.0/24) for HTTPS, edge2 CUPS print
|
||||||
|
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
|
||||||
|
# fc-signage:5190 for the signage AAT lane.
|
||||||
|
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
|
||||||
|
# telephony / gitea / fc-system / fc-signage / github-runner namespaces
|
||||||
|
# on 4444.
|
||||||
|
#
|
||||||
|
# 2026-05-25: added github-runner ingress on 4444 so CI jobs running in
|
||||||
|
# self-hosted runner pods (e.g. FlowerCore.Print.Web `help-screenshots`)
|
||||||
|
# can reach the grid. Without this allow, the session POST to
|
||||||
|
# `selenium-hub.selenium.svc.cluster.local:4444` was DNAT'd to the hub
|
||||||
|
# pod IP and then dropped at the Calico ingress hook — Selenium UI showed
|
||||||
|
# 0/4 sessions while the .NET HTTP client timed out at 60s. Same family
|
||||||
|
# as `feedback_netpol_dnat_backend_port`, wrong-source-namespace flavor.
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: NetworkPolicy
|
||||||
|
metadata:
|
||||||
|
name: selenium-netpol
|
||||||
|
namespace: selenium
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: selenium
|
||||||
|
app.kubernetes.io/component: isolation
|
||||||
|
spec:
|
||||||
|
egress:
|
||||||
|
- ports:
|
||||||
|
- port: 53
|
||||||
|
protocol: UDP
|
||||||
|
- port: 53
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: kube-system
|
||||||
|
- ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- podSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.200/32
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector: {}
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.43.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8080
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5300
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5400
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5100
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.42.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.56.0/24
|
||||||
|
- ports:
|
||||||
|
- port: 5200
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 10.0.57.16/32
|
||||||
|
- ports:
|
||||||
|
- port: 80
|
||||||
|
protocol: TCP
|
||||||
|
- port: 443
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- ipBlock:
|
||||||
|
cidr: 0.0.0.0/0
|
||||||
|
except:
|
||||||
|
- 172.16.0.0/12
|
||||||
|
- 192.168.0.0/16
|
||||||
|
- ports:
|
||||||
|
- port: 5190
|
||||||
|
protocol: TCP
|
||||||
|
to:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ingress:
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: traefik-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 8089
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- podSelector: {}
|
||||||
|
ports:
|
||||||
|
- port: 4442
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4443
|
||||||
|
protocol: TCP
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- port: 5555
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: telephony
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: gitea
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-system
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: fc-signage
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
- from:
|
||||||
|
- namespaceSelector:
|
||||||
|
matchLabels:
|
||||||
|
kubernetes.io/metadata.name: github-runner
|
||||||
|
ports:
|
||||||
|
- port: 4444
|
||||||
|
protocol: TCP
|
||||||
|
podSelector: {}
|
||||||
|
policyTypes:
|
||||||
|
- Ingress
|
||||||
|
- Egress
|
||||||
|
|
||||||
427
apps/selenium/selenium-grid.yaml
Normal file
427
apps/selenium/selenium-grid.yaml
Normal file
@@ -0,0 +1,427 @@
|
|||||||
|
# Selenium Grid 4 — RKE2 deployment
|
||||||
|
#
|
||||||
|
# Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from
|
||||||
|
# the GitHub Actions self-hosted runners. ArgoCD owns this namespace from
|
||||||
|
# 2026-05-25 (`infra-selenium` Application; previously these resources were
|
||||||
|
# orphan kubectl-applied since 2026-03-15).
|
||||||
|
#
|
||||||
|
# Endpoints:
|
||||||
|
# - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444
|
||||||
|
# - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444
|
||||||
|
# - Traefik public: https://selenium.iamworkin.lan
|
||||||
|
#
|
||||||
|
# Browser maxSessions:
|
||||||
|
# - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy
|
||||||
|
# Print.Web help-screenshots was the global bottleneck;
|
||||||
|
# see commit history for ops/runner-replica-rightsize)
|
||||||
|
# - firefox 1
|
||||||
|
# - edge 1
|
||||||
|
#
|
||||||
|
# Screenshots + video recording write to NFS via the chrome video sidecar.
|
||||||
|
# See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
port: 4444
|
||||||
|
targetPort: 4444
|
||||||
|
- name: publish
|
||||||
|
port: 4442
|
||||||
|
targetPort: 4442
|
||||||
|
- name: subscribe
|
||||||
|
port: 4443
|
||||||
|
targetPort: 4443
|
||||||
|
selector:
|
||||||
|
app: selenium-hub
|
||||||
|
type: ClusterIP
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
annotations:
|
||||||
|
metallb.io/ip-allocated-from-pool: bluejay-pool
|
||||||
|
metallb.universe.tf/loadBalancerIPs: 10.0.56.208
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
component: external-access
|
||||||
|
name: selenium-hub-external
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
clusterIP: 10.43.90.147
|
||||||
|
clusterIPs:
|
||||||
|
- 10.43.90.147
|
||||||
|
externalTrafficPolicy: Local
|
||||||
|
healthCheckNodePort: 32213
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
nodePort: 32411
|
||||||
|
port: 4444
|
||||||
|
targetPort: 4444
|
||||||
|
- name: publish
|
||||||
|
nodePort: 32068
|
||||||
|
port: 4442
|
||||||
|
targetPort: 4442
|
||||||
|
- name: subscribe
|
||||||
|
nodePort: 31000
|
||||||
|
port: 4443
|
||||||
|
targetPort: 4443
|
||||||
|
selector:
|
||||||
|
app: selenium-hub
|
||||||
|
type: LoadBalancer
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-hub
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-hub
|
||||||
|
app.kubernetes.io/name: selenium-hub
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
- name: SE_SESSION_REQUEST_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
- name: SE_SESSION_RETRY_INTERVAL
|
||||||
|
value: '5'
|
||||||
|
- name: JAVA_OPTS
|
||||||
|
value: -Xmx512m
|
||||||
|
image: selenium/hub:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /wd/hub/status
|
||||||
|
port: 4444
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: selenium-hub
|
||||||
|
ports:
|
||||||
|
- containerPort: 4444
|
||||||
|
name: web
|
||||||
|
- containerPort: 4442
|
||||||
|
name: publish
|
||||||
|
- containerPort: 4443
|
||||||
|
name: subscribe
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /wd/hub/status
|
||||||
|
port: 4444
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 5
|
||||||
|
# Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi
|
||||||
|
# limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the
|
||||||
|
# stampede-buffer pattern documented for multus
|
||||||
|
# (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m
|
||||||
|
# against a 500m limit, no contention.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1536Mi
|
||||||
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 1Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
app.kubernetes.io/name: selenium-node-chrome
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-chrome
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-chrome
|
||||||
|
app.kubernetes.io/name: selenium-node-chrome
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '2'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'false'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-chrome:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
name: selenium-chrome
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||||
|
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||||
|
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||||
|
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||||
|
# tested-stable 2Gi limit. CPU unchanged.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
- env:
|
||||||
|
- name: DISPLAY_CONTAINER_NAME
|
||||||
|
value: localhost
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_VIDEO_FILE_NAME
|
||||||
|
value: auto
|
||||||
|
- name: SE_VIDEO_UPLOAD_ENABLED
|
||||||
|
value: 'false'
|
||||||
|
image: selenium/video:ffmpeg-7.1-20250101
|
||||||
|
name: video
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 768Mi
|
||||||
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 384Mi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /videos
|
||||||
|
name: selenium-videos
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
- emptyDir:
|
||||||
|
sizeLimit: 5Gi
|
||||||
|
name: selenium-videos
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
app.kubernetes.io/name: selenium-node-firefox
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-firefox
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-firefox
|
||||||
|
app.kubernetes.io/name: selenium-node-firefox
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '1'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'true'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_START_VNC
|
||||||
|
value: 'false'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-firefox:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
failureThreshold: 5
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
timeoutSeconds: 5
|
||||||
|
name: selenium-firefox
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
failureThreshold: 5
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 5
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
app.kubernetes.io/name: selenium-node-edge
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
name: selenium-node-edge
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: selenium-node-edge
|
||||||
|
app.kubernetes.io/name: selenium-node-edge
|
||||||
|
app.kubernetes.io/part-of: selenium-grid
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- env:
|
||||||
|
- name: SE_EVENT_BUS_HOST
|
||||||
|
value: selenium-hub
|
||||||
|
- name: SE_EVENT_BUS_PUBLISH_PORT
|
||||||
|
value: '4442'
|
||||||
|
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
|
||||||
|
value: '4443'
|
||||||
|
- name: SE_NODE_MAX_SESSIONS
|
||||||
|
value: '1'
|
||||||
|
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
|
||||||
|
value: 'true'
|
||||||
|
- name: SE_VNC_NO_PASSWORD
|
||||||
|
value: '1'
|
||||||
|
- name: SE_SCREEN_WIDTH
|
||||||
|
value: '1920'
|
||||||
|
- name: SE_SCREEN_HEIGHT
|
||||||
|
value: '1080'
|
||||||
|
- name: SE_NODE_SESSION_TIMEOUT
|
||||||
|
value: '300'
|
||||||
|
image: selenium/node-edge:4.27.0
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 15
|
||||||
|
name: selenium-edge
|
||||||
|
ports:
|
||||||
|
- containerPort: 5555
|
||||||
|
name: node
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /status
|
||||||
|
port: 5555
|
||||||
|
initialDelaySeconds: 15
|
||||||
|
periodSeconds: 5
|
||||||
|
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
|
||||||
|
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
|
||||||
|
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
|
||||||
|
# was running 684Mi idle on the same cap. Matches the Firefox node's
|
||||||
|
# tested-stable 2Gi limit. CPU unchanged.
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
cpu: '1'
|
||||||
|
memory: 2Gi
|
||||||
|
requests:
|
||||||
|
cpu: 500m
|
||||||
|
memory: 1Gi
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: /dev/shm
|
||||||
|
name: dshm
|
||||||
|
volumes:
|
||||||
|
- emptyDir:
|
||||||
|
medium: Memory
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
name: dshm
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: selenium-hub
|
||||||
|
namespace: selenium
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- kind: Rule
|
||||||
|
match: Host(`selenium.iamworkin.lan`)
|
||||||
|
services:
|
||||||
|
- name: selenium-hub
|
||||||
|
port: 4444
|
||||||
|
tls:
|
||||||
|
secretName: selenium-tls
|
||||||
@@ -127,10 +127,13 @@ spec:
|
|||||||
initContainers:
|
initContainers:
|
||||||
- name: fix-data-perms
|
- name: fix-data-perms
|
||||||
image: busybox:latest
|
image: busybox:latest
|
||||||
# Also chown /shared-tts (hostPath /tmp/tts-audio) so the non-root
|
# Must run as root to chown the hostPath /tmp/tts-audio that may be
|
||||||
# app user (uid 1654) can write Piper .sln16 files that Asterisk
|
# root-owned after node reboot. Pod-level runAsNonRoot:true would
|
||||||
# reads at /var/lib/asterisk/sounds/tts. World-readable (755) is
|
# otherwise inherit and chown would fail with EPERM (see Notes memory
|
||||||
# fine — Asterisk runs as a different uid in the other pod.
|
# feedback_hostpath_initcontainer_chown_perms).
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsNonRoot: false
|
||||||
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: telephony-data
|
- name: telephony-data
|
||||||
|
|||||||
68
apps/worldbuilder/README.md
Normal file
68
apps/worldbuilder/README.md
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
# FlowerCore.WorldBuilder
|
||||||
|
|
||||||
|
ArgoCD-managed manifest for FlowerCore.WorldBuilder.Web — comic / storyboard
|
||||||
|
authoring service that drives ComfyUI for panel image generation and
|
||||||
|
QuestPDF for letter / A4 export.
|
||||||
|
|
||||||
|
Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||||
|
|
||||||
|
## Deployment order
|
||||||
|
|
||||||
|
1. **DNS preflight** — `worldbuilder.iamworkin.lan -> 10.0.56.200` MUST exist
|
||||||
|
in pfSense Unbound before this manifest is applied, or cert-manager
|
||||||
|
HTTP-01 silently exponential-backs-off ~2h.
|
||||||
|
Memory: `feedback_pfsense_dns_required_for_acme`.
|
||||||
|
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
|
||||||
|
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
|
||||||
|
`rke2-agent2` (10.0.56.13). Build with:
|
||||||
|
```bash
|
||||||
|
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
ssh fcadmin@$h \
|
||||||
|
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||||
|
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
done
|
||||||
|
```
|
||||||
|
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||||
|
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||||
|
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||||
|
4. **First production render** — open
|
||||||
|
`https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004`
|
||||||
|
and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake
|
||||||
|
generated images. This Sprint 32 visitor-safe profile uses
|
||||||
|
`ClientMode=fake`; switch the image-generation env vars back to ComfyUI only
|
||||||
|
for an operator-owned GPU render lane.
|
||||||
|
|
||||||
|
## Health probes
|
||||||
|
|
||||||
|
- `startupProbe` + `readinessProbe`: `httpGet /healthz` (registered explicitly
|
||||||
|
in Program.cs — anonymous, no DB or OpenAPI dependency).
|
||||||
|
- `livenessProbe`: `tcpSocket` as a cheap fallback.
|
||||||
|
Memory: `feedback_k8s_probes_must_not_hit_openapi`,
|
||||||
|
`feedback_k8s_probes_behind_auth_middleware`.
|
||||||
|
|
||||||
|
## Storage
|
||||||
|
|
||||||
|
- Longhorn RWO PVC `worldbuilder-data` (5Gi) mounted at `/data`. SQLite DB
|
||||||
|
lives at `/data/worldbuilder.db`, generated images under `/data/gallery/`,
|
||||||
|
PDF/PNG exports under `/data/exports/`.
|
||||||
|
- DataProtection keys persist to the same SQLite via
|
||||||
|
`AddFlowerCoreDataProtection<WorldBuilderDbContext>` — explicit migration
|
||||||
|
`20260429133417_Initial` already creates `fc_dp_keys`.
|
||||||
|
Memory: `feedback_dataprotection_keys_persist_to_app_dbcontext`,
|
||||||
|
`feedback_intranet_dataprotection_table_must_have_explicit_migration`.
|
||||||
|
|
||||||
|
## Image generation backend
|
||||||
|
|
||||||
|
Sprint 32 pins the Kubernetes profile to
|
||||||
|
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with
|
||||||
|
`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo
|
||||||
|
deterministic, avoids GPU exposure, and still exercises the studio/gallery
|
||||||
|
surface with persisted generated-image metadata.
|
||||||
|
|
||||||
|
The previous ComfyUI backend target was `http://10.0.56.20:8188` on
|
||||||
|
BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an
|
||||||
|
operator-owned follow-up that also verifies workstation reachability and image
|
||||||
|
import freshness.
|
||||||
256
apps/worldbuilder/worldbuilder.yaml
Normal file
256
apps/worldbuilder/worldbuilder.yaml
Normal file
@@ -0,0 +1,256 @@
|
|||||||
|
# FlowerCore.WorldBuilder — comic / storyboard authoring service.
|
||||||
|
#
|
||||||
|
# Deployment + Service + PVC + Certificate + IngressRoute. ArgoCD-managed
|
||||||
|
# end-to-end. See apps/worldbuilder/README.md for the per-deploy runbook.
|
||||||
|
#
|
||||||
|
# Image build (BLUEJAY-WS):
|
||||||
|
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||||
|
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||||
|
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||||
|
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||||
|
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||||
|
# done
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-worldbuilder
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
---
|
||||||
|
# SQLite DB + generated image gallery + PDF/PNG exports.
|
||||||
|
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-data
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-data
|
||||||
|
app.kubernetes.io/component: storage
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
storageClassName: longhorn
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
strategy:
|
||||||
|
# RWO PVC + single replica. Recreate avoids multi-attach overlap.
|
||||||
|
type: Recreate
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
annotations:
|
||||||
|
prometheus.io/scrape: "true"
|
||||||
|
prometheus.io/port: "8080"
|
||||||
|
prometheus.io/path: "/metrics/prometheus"
|
||||||
|
flowercore.io/audit-trace-id: "worldbuilder-runtime-demo"
|
||||||
|
spec:
|
||||||
|
securityContext:
|
||||||
|
fsGroup: 1654
|
||||||
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
# Bump tag for each rebuild. Initial deploy: v202605062048
|
||||||
|
image: localhost/fc-worldbuilder:v202605062048
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: "Production"
|
||||||
|
- name: DOTNET_RUNNING_IN_CONTAINER
|
||||||
|
value: "true"
|
||||||
|
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||||
|
value: "false"
|
||||||
|
# SQLite path overrides (default appsettings uses relative paths).
|
||||||
|
- name: ConnectionStrings__DefaultConnection
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
- name: FlowerCore__Database__Provider
|
||||||
|
value: "Sqlite"
|
||||||
|
- name: FlowerCore__Database__ConnectionStrings__Sqlite
|
||||||
|
value: "Data Source=/data/worldbuilder.db"
|
||||||
|
# Generated image gallery + exports persist on /data.
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageStore__RootPath
|
||||||
|
value: "/data/gallery"
|
||||||
|
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||||
|
value: "/data/exports"
|
||||||
|
# Visitor-safe Sprint 32 profile: fake backend keeps public demo
|
||||||
|
# rendering deterministic and avoids exposing BLUEJAY-WS GPU.
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||||
|
value: "http://127.0.0.1:1"
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||||
|
value: "fake"
|
||||||
|
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
|
||||||
|
value: "fake"
|
||||||
|
resources:
|
||||||
|
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
|
||||||
|
# time) while actual CPU usage is well below capacity. Idle Blazor
|
||||||
|
# Server + SignalR + a single ComfyUI poller uses ~5m, so 25m is
|
||||||
|
# generous. Re-evaluate if active rendering/export workers ever
|
||||||
|
# push past the limit.
|
||||||
|
requests:
|
||||||
|
cpu: 25m
|
||||||
|
memory: 256Mi
|
||||||
|
limits:
|
||||||
|
cpu: 1000m
|
||||||
|
memory: 768Mi
|
||||||
|
# /healthz is registered explicitly in Program.cs (anonymous, no DB
|
||||||
|
# or OpenAPI dependency). Liveness uses tcpSocket as a cheap fallback
|
||||||
|
# in case future middleware changes accidentally gate /healthz.
|
||||||
|
# Memory: feedback_k8s_probes_must_not_hit_openapi,
|
||||||
|
# feedback_k8s_probes_behind_auth_middleware.
|
||||||
|
startupProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
failureThreshold: 30
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: 8080
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
livenessProbe:
|
||||||
|
tcpSocket:
|
||||||
|
port: 8080
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /data
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
- name: logs
|
||||||
|
mountPath: /app/logs
|
||||||
|
volumes:
|
||||||
|
- name: data
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: worldbuilder-data
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
- name: logs
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web-tls
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web-tls
|
||||||
|
app.kubernetes.io/component: ingress
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- worldbuilder.iamworkin.lan
|
||||||
|
# step-ca ACME provisioner caps lifetime at 30d. Requesting 90d
|
||||||
|
# silently capped to 30d, making renewBefore 720h (30d) equal to the
|
||||||
|
# actual cert lifetime — triggered a perpetual renewal loop that
|
||||||
|
# generated 2365+ CertificateRequest objects in 18h. Match the working
|
||||||
|
# 720h/240h pattern used by every other FC service cert.
|
||||||
|
duration: 720h # 30d (step-ca cap)
|
||||||
|
renewBefore: 240h # 10d
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: worldbuilder-web
|
||||||
|
namespace: fc-worldbuilder
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: worldbuilder-web
|
||||||
|
app.kubernetes.io/component: ingress
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/tenant-id: system
|
||||||
|
flowercore.io/created-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`worldbuilder.iamworkin.lan`)
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: worldbuilder-web
|
||||||
|
port: 80
|
||||||
|
tls:
|
||||||
|
secretName: worldbuilder-web-tls
|
||||||
@@ -305,15 +305,17 @@ spec:
|
|||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 60
|
initialDelaySeconds: 60
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 15
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 5
|
timeoutSeconds: 15
|
||||||
|
failureThreshold: 3
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user