Compare commits

..

1 Commits

Author SHA1 Message Date
Codex
266b9cb8be feat(github-runner): add top Linux repo runners 2026-05-17 13:55:55 -05:00
100 changed files with 16636 additions and 28196 deletions

4
.gitattributes vendored
View File

@@ -1,4 +0,0 @@
/.gitattributes text eol=lf
*.yaml text eol=lf
*.yml text eol=lf
*.sh text eol=lf

View File

@@ -116,19 +116,8 @@ dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release
That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`.
## Non-K8s Pi Artifacts
Some `apps/*` directories are deployment artifact bundles consumed by Puppet
instead of Kubernetes workloads. `apps/fc-signage-pi-player/` carries the
Chromium signage Pi player, `apps/fc-divoom-dm-pi-device/` carries the additive
edge2 Divoom-as-DeviceManagement-device profile/Hiera contract, and
`apps/fc-divoom-tv-pi/` carries the Divoom TV Pi HDMI systemd/Puppet shape.
These bundles intentionally avoid Deployment, IngressRoute, Certificate, and
OnePasswordItem resources.
## References
- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
- Public DNS operator host: `https://dns.iamworkin.lan`

View File

@@ -139,20 +139,6 @@ metadata:
spec:
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
---
# FlowerCore DMS Manager MCP key (product-manager fan-out). Synced from the
# 1Password "FlowerCore DMS MCP Keys" item (field `credential`) into Secret
# `dms-mcp-keys`; the deployment reads it as DMS_MCP_API_KEY for the fc_dms
# MCP server. presentations/messageboard/segmentdisplay/telephony 1P MCP-key
# items also exist and follow this same pattern when added.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: dms-mcp-keys
namespace: agent-zero
spec:
itemPath: "vaults/IAmWorkin/items/FlowerCore DMS MCP Keys"
---
apiVersion: apps/v1
kind: Deployment
@@ -262,7 +248,7 @@ spec:
# use the bridge's Ollama-compatible root via OLLAMA_HOST.
mkdir -p /a0/usr/plugins/_model_config
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":32768,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":32768}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
MODELCFG
# Strip heredoc indentation
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
@@ -290,7 +276,7 @@ spec:
fi
export A0_SET_mcp_servers="$(
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); dms_key = os.getenv("DMS_MCP_API_KEY"); dms_key and servers.setdefault("fc_dms", {"type": "streamable-http", "url": os.getenv("DMS_MCP_URL", "http://dms-web.fc-dms.svc/mcp"), "headers": {"X-Api-Key": dms_key}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
)"
# Run the original entrypoint
exec /exe/initialize.sh $BRANCH
@@ -299,7 +285,7 @@ spec:
env:
# Agent identity
- name: AGENT_NAME
value: "Blue Jay"
value: "Blue Jay (NUC)"
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
@@ -358,7 +344,7 @@ spec:
- name: A0_SET_browser_model_provider
value: "ollama"
- name: A0_SET_browser_model_name
value: "qwen2.5:7b"
value: "gemma3:4b"
- name: A0_SET_browser_model_api_base
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_browser_model_api_key
@@ -367,7 +353,7 @@ spec:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: A0_SET_browser_model_vision
value: "false"
value: "true"
- name: OLLAMA_HOST
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: FLOWERCORE_AGENTZERO_OLLAMA_URL
@@ -407,20 +393,6 @@ spec:
secretKeyRef:
name: knowledge-mcp-tokens
key: password
# FlowerCore DMS Manager MCP (dynamic message signs) — first of the
# product-manager MCP fan-out. dms-web /mcp requires X-Api-Key; the key
# is synced from 1Password "FlowerCore DMS MCP Keys" (field credential)
# by the dms-mcp-keys OnePasswordItem CRD above. Same builder+env+netpol
# pattern extends to presentations/messageboard/segmentdisplay/telephony
# (all have 1P MCP-key items). MySQL + Signage still need 1P MCP items
# provisioned before they can join (mysql-web /mcp 401s with no key today).
- name: DMS_MCP_URL
value: "http://dms-web.fc-dms.svc/mcp"
- name: DMS_MCP_API_KEY
valueFrom:
secretKeyRef:
name: dms-mcp-keys
key: credential
# Print.Web — Thermal printer service on edge2.
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
@@ -665,19 +637,6 @@ spec:
ports:
- port: 5300
protocol: TCP
# FlowerCore DMS Manager MCP (product-manager fan-out) — in-cluster
# dms-web. NetworkPolicy matches the destination POD port: dms-web svc:80
# targets containerPort 8080, so the egress MUST allow 8080 (not the svc
# port 80) — same as the fc-chat rule. Allow both for parity.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: fc-dms
ports:
- port: 80
protocol: TCP
- port: 8080
protocol: TCP
# Allow internet (for kubectl image pull, etc)
- to:
- ipBlock:

View File

@@ -13736,15 +13736,20 @@ data:
### Active Services
The fleet spans dozens of services -- Signage (Web + WPF Player), Common Libraries, MySQL Manager, PHP Manager, Telephony, Chat, AiStation, PiManager, Print.Web, Divoom, TtsReader, WorldBuilder, Library, Retail, and more. Each carries hundreds-to-thousands of xUnit tests; the fleet total runs to many thousands of passing tests.
**Never quote a hard test count from memory** -- counts drift between sprints and stale numbers look more authoritative than they are. Use range language ("dozens of controllers", "hundreds of tests", "thousands fleet-wide") and, when a number actually matters, run the test command and read the live result. The canonical state of counts lives in `MEMORY.md` and `docs/standards/feature-backlog.md`, not in this prompt.
| Service | Tests | Key Facts |
|---------|-------|-----------|
| Signage Web | 3,127 | 17 controllers, 33 services, 26 entities, 32 pages, 154 MCP tools |
| Signage WPF Player | 1,700 | 12 screen types, 12 zone controls, LibVLC video, HtmlBundleRenderer |
| Common Libraries | 1,189 | UI.Components (427), Operator.Sdk (61), Security (110) |
| MySQL Manager | 508 | 135 Operator + 373 Web |
| PHP Manager | 423 | 32 Operator + 391 Web |
| **Total** | **6,947** | 0 skipped, 0 failures |
### Technology Stack
- **.NET 10 LTS** -- target `net10.0`, SDK 10.0.100
- **Blazor Server** -- Web UI with Blue Jay theme
- **WPF** -- Desktop apps (must build with `dotnet.exe` on Windows -- the Linux SDK cannot compile WPF/WinForms)
- **WPF** -- Desktop apps (must build with `dotnet.exe` from WSL)
- **Entity Framework Core** -- Multi-provider (SQLite, MySQL Pomelo, PostgreSQL, SQL Server)
- **gRPC** -- HTTP/2 bidirectional streaming (port 5191)
- **KubeOps 9.x** -- C# Kubernetes operators
@@ -13764,9 +13769,9 @@ data:
|------|---------|
| 5190 | HTTP/REST |
| 5191 | gRPC/HTTP2 |
| 11434 | Ollama API (fleet AI hub VIP `10.0.57.201:11434`) |
You reach the fleet via Traefik IngressRoutes on `*.iamworkin.lan` (TLS via step-ca). Your own UI is `https://agent-zero.iamworkin.lan`. Don't surface raw NodePort numbers -- they drift.
| 30050 | Agent Zero UI |
| 11434 | Ollama API |
| 30052 | Piper TTS |
## Technical Standards (Non-Negotiable)
@@ -13798,32 +13803,6 @@ data:
- **`new X509Certificate2(byte[])` in .NET 10** -- Use `X509CertificateLoader.LoadPkcs12()`
- **ToString("P0") non-breaking space** -- U+00A0 before percent sign breaks assertions
## Session Continuity: HANDOFF.md
When another agent (Claude Code or Codex) runs out of credits or hands off work mid-task, they write a checkpoint to `HANDOFF.md` in the FlowerCore.Notes repo.
**Location:** `/a0/work/repos/FlowerCore/FlowerCore.Notes/HANDOFF.md`
**When to read it:**
- At the start of any session where you're asked to continue or pick up work
- When a user says "Claude ran out of credits" or "pick up where we left off"
- When `HANDOFF.md` status field shows `credits-exhausted` or `handed-off`
**Key sections to check:**
- **Reasoning Chain** — what the previous agent figured out (root cause, failed attempts, working hypothesis)
- **Suggested Next Steps** — ordered list of what to do, prioritized
- **Uncommitted Changes** — work that may exist on disk but not in git
- **Blockers** — anything preventing progress
**What you can do with it:**
- Handle quick tasks listed in "Suggested Next Steps" (YAML gen, doc formatting, SSH checks)
- Escalate to Claude Code or Codex if the task requires multi-file code changes (beyond your 32K context)
- Report findings back by updating the handoff file or telling the user
**What you should NOT do:**
- Don't attempt multi-file refactors from a handoff — escalate those
- Don't ignore the "Failed Attempts" section — repeating them wastes time
## Repository Access
All of Andrew's git repositories are mounted at `/a0/work/repos/` (read-only):
@@ -13848,51 +13827,47 @@ data:
| PHP Manager | `/a0/work/repos/FlowerCore/FlowerCore.PHP/` |
| Notes / Docs | `/a0/work/repos/FlowerCore/FlowerCore.Notes/` |
## The AI Hub -- GX10 (fleet Ollama)
## Available Ollama Models
The fleet AI runs on the **GX10** -- an ASUS Ascent GX10 = NVIDIA DGX Spark (GB10 Grace-Blackwell, ARM64, CUDA 13, **121 GiB unified memory**) at `10.0.56.14`. Ollama serves on the fleet VIP **`http://10.0.57.201:11434`** with models warm-pinned (`OLLAMA_KEEP_ALIVE=-1`) on local NVMe.
Access via `http://host.docker.internal:11434`:
This GX10 hub **supersedes the retired BLUEJAY-WS R9700 and BLUEJAY-AI (.132) AI roles.** There is no `host.docker.internal`, no port-30050 lane, no edge1-as-Ollama-host story, and no WSL/K3s deployment. The single live deployment is the RKE2 cluster lane (`https://agent-zero.iamworkin.lan`), which reaches Ollama through the FlowerCore LLM Bridge tier router.
| Model | Size | Role | Speed | Status |
|-------|------|------|-------|--------|
| qwen2.5:3b | 1.9 GB | Quick utility tasks | ~190 tok/s | 100% GPU |
| mistral:7b | 4.4 GB | Fast summarization | ~110 tok/s | 100% GPU |
| granite3.1-dense:8b | 5 GB | Structured JSON/YAML, tool calling | ~92 tok/s | 100% GPU |
| deepseek-r1:8b | 5.2 GB | Reasoning (compact) | ~73 tok/s | 100% GPU |
| qwen3-vl:8b | 6.1 GB | Fast lightweight vision | ~76 tok/s | 100% GPU |
| deepseek-ocr | 6.7 GB | Document OCR | ~167 tok/s | 100% GPU |
| translategemma:12b | 8.1 GB | Translation (55 languages) | ~54 tok/s | 100% GPU |
| phi4:14b | 9.1 GB | .NET-focused reasoning, architecture | ~60 tok/s | 100% GPU |
| devstral:24b | 14 GB | Agentic coding specialist (Mistral) | needs ReBAR | blocked |
| gemma3:27b | 17 GB | Vision + text, browser model | needs ReBAR | blocked |
| qwen3-coder:30b | 19 GB | Advanced code generation | needs ReBAR | blocked |
| deepseek-r1:32b | 20 GB | Deep reasoning (direct API) | needs ReBAR | blocked |
| qwen3:32b | 20 GB | Chat brain (JSON tool-call mode) | needs ReBAR | blocked |
| nomic-embed-text | 274 MB | Embeddings (768 dims, RAG/memory) | N/A | 100% GPU |
| Model | Role | Tool-calling? |
|-------|------|---------------|
| `qwen2.5:14b` | **Chat brain** (`fc:balanced`) -- agentic loop, code, architecture | YES (proven live) |
| `qwen2.5:7b` | **Utility + browser** (`fc:cheap`) -- fast tool-capable tier | YES |
| `gemma3:12b` | Vision / image description ONLY (non-agentic path) | NO -- 400 on tools |
| `gemma3:4b` | Lightweight vision fallback | NO -- 400 on tools |
| `nomic-embed-text` | Embeddings (768 dims) for memory / RAG | N/A (embeddings only) |
| `llama3.2:1b` | Tiny utility -- garbles tool output, avoid for the loop | NO (too small) |
With 121 GiB unified memory, VRAM is never the bottleneck -- `nvidia-smi` reports VRAM "Not Supported"; use `free -h`. Multiple models stay resident at once; Ollama does not need to swap.
**VRAM budget**: AMD Radeon AI PRO R9700 32GB -- 3-4 models fit simultaneously. Ollama swaps models automatically.
### Model Selection by Task
| Task | Primary | Notes |
|------|---------|-------|
| C#/.NET code gen | `qwen2.5:14b` | Tool-capable, free/local |
| Agentic coding / tool loop | `qwen2.5:14b` | Must be tool-capable -- see rule below |
| Code review | `qwen2.5:14b` | Falls back to `qwen2.5:7b` for speed |
| Architecture decisions | `qwen2.5:14b` | -- |
| K8s manifests / YAML | `qwen2.5:7b` | Fast structured output |
| Fast utility | `qwen2.5:7b` | -- |
| Screenshot / image description | `gemma3:12b` | Vision-only, NO tool calls in this path |
| Embeddings | `nomic-embed-text` | -- |
## RULE: Models & Tool-Calling (non-negotiable)
**The whole point of Agent Zero is the agentic tool-calling loop, and it MUST run on a tool-capable model.** The fleet learned this the hard way:
- **Use the `qwen2.5` family for any turn that may call a tool** -- chat goes through `fc:balanced` -> `qwen2.5:14b`, utility/browser through `fc:cheap` -> `qwen2.5:7b`. Both return proper `tool_calls`. `qwen2.5:14b` tool-calling is **proven live**.
- **`gemma3:*` CANNOT call tools.** Ollama returns `400: does not support tools` (even `"tools": null`/`[]`) for the whole gemma3 family. Use it ONLY behind a non-agentic vision/image-description path -- never as the agent brain.
- **Models <=3B garble tool output.** `llama3.2:1b` and any sub-3B model will mangle JSON tool calls. Don't route the loop through them.
- **`nomic-embed-text` is embeddings-only.** It powers memory/RAG vectors; it cannot chat or call tools.
- **qwen2.5 instruct does NOT need `think`.** Do not add a `think` kwarg (that's a qwen3/reasoning gate). Chat kwargs are `{"temperature":0,"num_ctx":32768}`.
If a turn unexpectedly hits `400: does not support tools` or the model emits literal `<tool_call>` text instead of structured calls, the wiring drifted to a non-tool model -- mob it: report the slot, don't silently degrade.
| Task | Primary | Quick Alternative |
|------|---------|-------------------|
| C#/.NET code gen | qwen3-coder:30b | devstral:24b |
| Agentic coding | devstral:24b | qwen3-coder:30b |
| Code review | phi4:14b | qwen3-coder:30b |
| Architecture decisions | phi4:14b | deepseek-r1:32b |
| K8s manifests / YAML | granite3.1-dense:8b | qwen3-coder:30b |
| Screenshot analysis | gemma3:27b | qwen3-vl:8b |
| Translation | translategemma:12b | -- |
| Fast summarization | mistral:7b | qwen2.5:3b |
| Deep reasoning | deepseek-r1:32b | phi4:14b |
| Embeddings | nomic-embed-text | -- |
## The Blue Jay Agent Team
The "Blu" roles below are a **persona vocabulary** for focused sub-agent spawns -- labels for scoped tasks, not a standing fixed-size team. When you are the orchestrator, you spawn focused agents for parallel development using these personas:
You work as part of a 14-agent squad. When you are the orchestrator, you spawn focused agents for parallel development:
### Tier 1 -- Core Development
@@ -13974,106 +13949,6 @@ data:
FlowerCore.{Service}.Operator.Tests/
```
## Available Tools
You have custom tools that give you real capabilities. When a user asks you to do something, USE the appropriate tool -- do not say you cannot do it. You are not a generic chatbot; you have hardware access and infrastructure control.
### print_web -- Thermal Printer (NuPrint 210, 58mm)
Connected to a real thermal receipt printer. You CAN print barcodes, QR codes, labels, receipts, images, and more.
| Action | What It Does | Key Args |
|--------|-------------|----------|
| `barcode` | Print a barcode label | `data`, `symbology` (Code128/UpcA/Ean13/Ean8/Code39/Codabar), `title`, `copies` |
| `qr` | Print a QR code | `data`, `label`, `module_size` |
| `label` | Print a text label | `title`, `subtitle`, `copies` |
| `receipt` | Print a formatted receipt | `header`, `lines` [{left, right, bold?, separator?}], `footer` |
| `image` | Print an image | `image_base64` or `image_path`, `label` |
| `test` | Print a test page | (no args) |
| `url` | Print URL as receipt + QR | `url`, `title` |
| `recipe` | Scrape and print a recipe | `url` |
| `recipe_print` | Enhanced recipe (Selenium fallback) | `url` |
| `ai_summary` | AI-summarize text, optionally print | `text`, `url`, `print_result` |
| `product` | Look up product by barcode | `barcode` |
| `product_search` | Search product by name | `query` |
| `status` | Printer connection status | (no args) |
| `paper` | Paper roll level | (no args) |
| `queue` | Print queue depth | (no args) |
| `hardware` | Hardware diagnostics | (no args) |
| `waste` | Paper waste report | `days` |
| `drawer` | Open cash drawer | (no args) |
| `clear_queue` | Clear print queue | `source` |
**Barcode auto-detection:** 13 digits = EAN-13, 12 digits = UPC-A, starts with 978/979 = ISBN, otherwise Code128.
**Example:** User says "print a barcode for 20612000248789" → use `print_web` with `action="barcode"`, `data="20612000248789"`, `symbology="Ean13"`.
### ssh_remote -- SSH to Infrastructure Nodes
Execute commands on remote servers via SSH.
### kubectl_manager -- Kubernetes Cluster
Manage RKE2 cluster resources, pods, deployments.
### ollama_model_switch -- Ollama Model Management
Switch models, check loaded models, manage VRAM.
### flowercore_build / flowercore_test -- Build and Test
Build .NET projects and run test suites.
### qrcode_generator -- Generate QR Code Images
Generate QR code image files locally.
### kiwix_search -- Offline Knowledge Base
Search offline Wikipedia, documentation archives.
### corpus_search -- Fleet Vector Corpus (Bible / Lexicons / Morphology)
Semantic search over the fleet knowledge DB at `/a0/usr/vectors/<slug>.db`
(Strong's, macula-greek/hebrew, aquifer-bible-dictionary/translation-words/acai,
WEB + Berean Bibles). Uses Ollama `nomic-embed-text` to embed the query,
computes cosine in Python, returns ranked chunks with source + passage + score.
Use this for "what does Genesis 1:1 say", "show me every use of agape",
"find dictionary entries for covenant", etc. Faster and more offline-friendly
than `intranet_search` for scripture/lexicon queries.
| Arg | Description |
|-----|-------------|
| `query` | Search text. Required. |
| `limit` | Top-K results (default 8). |
| `index` | Optional: `bible-texts`, `lexicons`, `dictionaries`, `morphology`. |
| `repo` | Optional repo substring filter (e.g. `world-english-bible`). |
| `db` | Optional DB override (absolute path or filename inside `/a0/usr/vectors`). Default picks the largest fleet tier present (workstation-full → pi-edge → bmo-bot). |
| `action` | Optional. `stats` returns a markdown inventory of every fleet DB (name/size/index/chunk counts/last-built) without doing a query. Useful for "what's in the corpus?" before picking a specific query. |
## RULE: Knowledge & RAG (which source to reach for)
When a question needs grounding in FlowerCore knowledge, reach for sources in this order:
1. **`fc_knowledge` MCP -- the PRIMARY RAG.** This is the fleet's canonical retrieval layer: vector indexes over the Notes and docs corpora (`notes-md`, `notes-html`, and friends), embedded with `nomic-embed-text` on the GX10 hub. Use it first for "where is X documented", "what does the standard say about Y", ADRs, runbooks, gotchas, and any project/infra knowledge. Embeddings run on the GX10 (`10.0.57.201`) so they are fast now -- no more slow Pi5 embed waits.
2. **`corpus_search` (fallback / scripture & lexicons).** Offline vector search over the Bible/lexicon/morphology corpus DBs. Prefer this for scripture, Strong's, Greek/Hebrew word studies, and dictionary lookups. Faster and more offline-friendly than the intranet for those queries.
3. **`intranet_search` (fallback).** HTTP search against the Blue Jay Lab Intranet (`https://intranet.iamworkin.lan/api/v1/search`) when `fc_knowledge` is unavailable or the answer lives in intranet-only content.
4. **`kiwix_search` (general reference).** Offline Wikipedia/Wiktionary when the question is general-knowledge, not FlowerCore-specific.
### Offline datasets in the fleet corpus cache
The shared cache (`corpus-cache/`, manifest: its own `README.md`; see `docs/standards/shared-datasets.md`) holds open-licensed offline data you can query via `corpus_search` / Knowledge indexes:
- **Bibles:** Berean Standard Bible, World English Bible (public domain), Reina-Valera (Spanish).
- **Greek / Hebrew morphology:** MACULA Greek (NT) and MACULA Hebrew (OT) -- morphology + syntax trees, Strong's numbers embedded.
- **Strong's & lexicons:** Strong's Exhaustive Concordance (Greek + Hebrew), Tyndale Brief lexicon (TBESG), STEPBible tables.
- **Notes / dictionaries / cross-refs:** unfoldingWord Translation Notes/Words, Aquifer Bible Dictionary, Aquifer Study Notes, ACAI entity graph, OpenBible cross-refs, Treasury of Scripture Knowledge.
- **General reference:** Wikipedia and Wiktionary ZIMs (via `kiwix_search`).
The indexing tiers are `bible-texts`, `translation-notes`, `dictionaries`, `morphology`, `strongs`, and `wikipedia`. **Gotcha:** a corpus is queryable only when its on-disk directory name matches the index config exactly -- a mismatch makes the indexer silently skip it.
**Rule: Never say "I cannot" for something a tool can do.** Check your tools first.
## Remember
You are Blue Jay. You guard the nest. You cache knowledge. You mob bugs fearlessly. You sing when the build is green. And you always, always keep one eye on the squirrels.

View File

@@ -1,453 +0,0 @@
# Authentik OIDC backend
# ArgoCD-managed. BlueJay Lab.
#
# Stack:
# - PostgreSQL 16 StatefulSet (single replica, Longhorn RWO 5Gi)
# - Redis 7 Deployment (no persistence — session/cache only)
# - Authentik server + worker Deployments (image ghcr.io/goauthentik/server:2024.12.3)
# - Media PVC shared between server + worker (Longhorn RWO 2Gi)
# - Certificate via step-ca-acme ClusterIssuer
# - Traefik IngressRoute at id.iamworkin.lan
#
# Secrets come from 1Password item "authentik-credentials" (IAmWorkin vault, id y6i74ch22q5wvm7znquq4nhhcu)
# via the OnePasswordItem CRD, materialized into k8s Secret authentik/authentik-credentials.
#
# Why the discovery URL is /application/o/pimanager/ : Authentik issues per-application OIDC providers.
# The pimanager OIDC application/provider is created after the cluster pods are healthy (manual or
# via API once the bootstrap token is available — see Notes substrate).
---
apiVersion: v1
kind: Namespace
metadata:
name: authentik
labels:
app.kubernetes.io/part-of: bluejay-infra
---
# 1Password operator pulls the authentik-credentials item into a k8s Secret of the same name.
# Field labels in 1P become Secret keys: AUTHENTIK_SECRET_KEY, POSTGRES_PASSWORD, REDIS_PASSWORD,
# BOOTSTRAP_ADMIN_PASSWORD, BOOTSTRAP_ADMIN_TOKEN, BOOTSTRAP_ADMIN_EMAIL.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: authentik-credentials
namespace: authentik
spec:
itemPath: "vaults/IAmWorkin/items/authentik-credentials"
---
# Shared media volume for server + worker pods.
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: authentik-media
namespace: authentik
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
resources:
requests:
storage: 2Gi
---
# PostgreSQL 16 StatefulSet — Authentik's primary store.
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: authentik-postgres
namespace: authentik
labels:
app: authentik-postgres
argocd.argoproj.io/instance: infra-authentik
spec:
persistentVolumeClaimRetentionPolicy:
whenDeleted: Retain
whenScaled: Retain
podManagementPolicy: OrderedReady
serviceName: authentik-postgres
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: authentik-postgres
template:
metadata:
labels:
app: authentik-postgres
spec:
containers:
- name: postgres
image: postgres:16-alpine
ports:
- containerPort: 5432
name: postgres
env:
- name: POSTGRES_USER
value: authentik
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: POSTGRES_DB
value: authentik
- name: POSTGRES_INITDB_ARGS
value: "--encoding=UTF-8 --lc-collate=C --lc-ctype=C"
- name: PGDATA
value: /var/lib/postgresql/data/pgdata
readinessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
exec:
command: ["pg_isready", "-U", "authentik"]
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 1Gi }
volumeMounts:
- name: pgdata
mountPath: /var/lib/postgresql/data
volumeClaimTemplates:
# apiVersion/kind included deliberately: this STS was created via ArgoCD ServerSideApply,
# so the live object carries PVC TypeMeta inside volumeClaimTemplates; omitting it here
# leaves the app eternally OutOfSync even though kubectl SSA dry-run shows no change.
- apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pgdata
spec:
storageClassName: longhorn
accessModes: [ReadWriteOnce]
volumeMode: Filesystem
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: Service
metadata:
name: authentik-postgres
namespace: authentik
spec:
clusterIP: None
selector:
app: authentik-postgres
ports:
- name: postgres
port: 5432
targetPort: 5432
---
# Redis 7 — session storage + Celery broker. No persistence needed (cache).
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-redis
namespace: authentik
labels:
app: authentik-redis
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: authentik-redis
template:
metadata:
labels:
app: authentik-redis
spec:
containers:
- name: redis
image: redis:7-alpine
args:
- "--save"
- ""
- "--appendonly"
- "no"
- "--requirepass"
- "$(REDIS_PASSWORD)"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
ports:
- containerPort: 6379
name: redis
readinessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 5
periodSeconds: 5
livenessProbe:
tcpSocket: { port: 6379 }
initialDelaySeconds: 30
periodSeconds: 30
resources:
requests: { cpu: 50m, memory: 64Mi }
limits: { cpu: 500m, memory: 256Mi }
---
apiVersion: v1
kind: Service
metadata:
name: authentik-redis
namespace: authentik
spec:
selector:
app: authentik-redis
ports:
- name: redis
port: 6379
targetPort: 6379
---
# Authentik server Deployment — HTTP frontend on :9000.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-server
namespace: authentik
labels:
app: authentik-server
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with worker
selector:
matchLabels:
app: authentik-server
template:
metadata:
labels:
app: authentik-server
spec:
securityContext:
# Authentik image runs as uid 1000 "authentik" but the Longhorn PVC mounts
# root:root by default. fsGroup recursively chgrp + chmod g+rwx so the
# non-root container can mkdir /media/public during the tenant_files migration.
fsGroup: 1000
containers:
- name: server
image: ghcr.io/goauthentik/server:2024.12.3
args: ["server"]
ports:
- containerPort: 9000
name: http
- containerPort: 9443
name: https
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_PASSWORD
- name: AUTHENTIK_BOOTSTRAP_TOKEN
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_TOKEN
- name: AUTHENTIK_BOOTSTRAP_EMAIL
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_EMAIL
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
# First-boot Authentik can take 3+ min on the migration phase
# (waiting on DB lock while worker also runs migrations). Initial
# delays are generous so kubelet doesn't kill the pod mid-migration;
# periodSeconds keeps post-startup probing responsive.
readinessProbe:
httpGet:
path: /-/health/ready/
port: 9000
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 12
livenessProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 300
periodSeconds: 30
timeoutSeconds: 10
failureThreshold: 3
startupProbe:
httpGet:
path: /-/health/live/
port: 9000
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 40 # 30s + 40*15s = 10.5 min budget
resources:
requests: { cpu: 150m, memory: 512Mi }
limits: { cpu: 1500m, memory: 1Gi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
# Authentik worker Deployment — runs Celery background tasks.
apiVersion: apps/v1
kind: Deployment
metadata:
name: authentik-worker
namespace: authentik
labels:
app: authentik-worker
argocd.argoproj.io/instance: infra-authentik
spec:
replicas: 1
strategy:
type: Recreate # shares /media RWO PVC with server
selector:
matchLabels:
app: authentik-worker
template:
metadata:
labels:
app: authentik-worker
spec:
securityContext:
# Same as server pod — non-root uid 1000 needs PVC group write.
fsGroup: 1000
containers:
- name: worker
image: ghcr.io/goauthentik/server:2024.12.3
args: ["worker"]
env:
- name: AUTHENTIK_SECRET_KEY
valueFrom:
secretKeyRef:
name: authentik-credentials
key: AUTHENTIK_SECRET_KEY
- name: AUTHENTIK_REDIS__HOST
value: authentik-redis
- name: AUTHENTIK_REDIS__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: REDIS_PASSWORD
- name: AUTHENTIK_POSTGRESQL__HOST
value: authentik-postgres
- name: AUTHENTIK_POSTGRESQL__NAME
value: authentik
- name: AUTHENTIK_POSTGRESQL__USER
value: authentik
- name: AUTHENTIK_POSTGRESQL__PASSWORD
valueFrom:
secretKeyRef:
name: authentik-credentials
key: POSTGRES_PASSWORD
- name: AUTHENTIK_DISABLE_UPDATE_CHECK
value: "true"
- name: AUTHENTIK_ERROR_REPORTING__ENABLED
value: "false"
- name: AUTHENTIK_LOG_LEVEL
value: info
resources:
requests: { cpu: 100m, memory: 256Mi }
limits: { cpu: 1000m, memory: 768Mi }
volumeMounts:
- name: media
mountPath: /media
volumes:
- name: media
persistentVolumeClaim:
claimName: authentik-media
---
apiVersion: v1
kind: Service
metadata:
name: authentik-server
namespace: authentik
spec:
selector:
app: authentik-server
ports:
- name: http
port: 9000
targetPort: 9000
- name: https
port: 9443
targetPort: 9443
---
# step-ca leaf certificate for id.iamworkin.lan.
# step-ca container resolver uses pfSense Unbound, so the public A record for id.iamworkin.lan
# MUST exist before this Certificate is applied (cert-manager HTTP-01 will silently 2h-backoff
# otherwise). Added 2026-05-25 via scripts/pfsense-add-id-host.py.
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: authentik-tls
namespace: authentik
spec:
secretName: authentik-tls
dnsNames:
- id.iamworkin.lan
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: authentik
namespace: authentik
spec:
entryPoints: [websecure]
routes:
- match: Host(`id.iamworkin.lan`)
kind: Rule
services:
- name: authentik-server
port: 9000
tls:
secretName: authentik-tls

View File

@@ -1,195 +0,0 @@
# FlowerCore.AiStation.Web GitOps adoption manifest.
#
# Authored from the already-live fc-aistation resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: aistation-web-data
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-27448d6f-6e66-42a7-a293-73dd8bbd6b3e
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: aistation-web
strategy:
type: Recreate
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: aistation-web-config
image: localhost/fc-aistation-web:v20260602-aistation-owned-deploy-fix2
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: aistation-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /healthz
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: aistation-web-data
---
apiVersion: v1
kind: Service
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
clusterIP: 10.43.211.127
clusterIPs:
- 10.43.211.127
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: aistation-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: aistation-web-tls
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
dnsNames:
- aistation.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: aistation-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: aistation-web
namespace: fc-aistation
labels:
app.kubernetes.io/name: aistation-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-aistation
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`aistation.iamworkin.lan`)
services:
- name: aistation-web
port: 80
tls:
secretName: aistation-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose aistation-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: aistation-web-public
# namespace: fc-aistation
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`aistation.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: aistation-web-public-profile-header # injects entitlement profile
# services:
# - name: aistation-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,207 +1,5 @@
# FlowerCore Chat
#
# ArgoCD-managed workload plus TLS/Ingress. The chat-web-secret remains an
# out-of-band Secret until the values are moved into a 1Password-backed item;
# the Deployment references it as optional so GitOps can own the workload
# without storing secret material in this repo.
---
apiVersion: v1
kind: Namespace
metadata:
name: fc-chat
labels:
app.kubernetes.io/part-of: flowercore
---
apiVersion: v1
kind: ConfigMap
metadata:
name: chat-web-config
namespace: fc-chat
labels:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
data:
ASPNETCORE_ENVIRONMENT: Production
ASPNETCORE_URLS: "http://+:8080"
ASPNETCORE_FORWARDEDHEADERS_ENABLED: "true"
FlowerCore__Auth__Enabled: "false"
FlowerCore__Auth__Oidc__Enabled: "true"
FlowerCore__Auth__Oidc__Authority: "https://id.iamworkin.lan/application/o/chat/"
FlowerCore__Auth__Oidc__Audience: "chat"
FlowerCore__Auth__Oidc__ClientId: "chat"
FlowerCore__Database__ConnectionStrings__Sqlite: "Data Source=/data/chat.db"
# Ollama target. BLUEJAY-WS remains faster from the workstation, but this lane
# proved Chat pods time out reaching 10.0.56.20:11434. Keep generation and
# behavior-rule checks on the cluster-routable edge1 endpoint until that route
# is fixed; choose models that edge1 actually hosts.
FlowerCore__AI__OllamaBaseUrl: "http://10.0.57.201:11434"
FlowerCore__AI__DefaultModelName: "gemma3:12b"
ChatOptions__BehaviorRuleEngine__OllamaBaseUrl: "http://10.0.57.201:11434"
ChatOptions__BehaviorRuleEngine__FallbackOllamaBaseUrl: "http://10.0.57.201:11434"
ChatOptions__BehaviorRuleEngine__ModelName: "gemma3:4b"
FlowerCore__AI__Memory__UseSharedIndexingAdapter: "true"
FlowerCore__AI__Memory__UseOllamaEmbeddings: "true"
FlowerCore__AI__Memory__EmbeddingModel: "nomic-embed-text"
FlowerCore__AI__Memory__EnableSharedIndexingBackfill: "true"
FlowerCore__AI__Memory__SharedIndexingDatabasePath: "/data/chat-memory-index.db"
FlowerCore__AI__Skills__Library__LibraryApiUrl: "http://library-web.fc-library.svc.cluster.local"
FlowerCore__AI__Skills__Retail__RetailApiUrl: "http://retail-web.fc-retail.svc.cluster.local"
FlowerCore__AI__Skills__Intranet__IntranetBaseUrl: "http://intranet-web.intranet.svc.cluster.local"
FlowerCore__AI__Skills__Print__PrintMcpBaseUrl: "http://10.0.57.16:5200"
FlowerCore__AI__Helpdesk__SentimentEscalation__Enabled: "true"
FlowerCore__AI__IrcBridge__Enabled: "true"
FlowerCore__AI__IrcBridge__DefaultProfileSlug: "it-helpdesk"
FlowerCore__AI__IrcBridge__MentionProfileSlug: "it-helpdesk"
FlowerCore__AI__IrcBridge__MentionReactiveMode: "mentions-only"
FlowerCore__AI__IrcBridge__AllowActionExecution: "false"
FlowerCore__AI__Voice__Piper__Host: "10.0.57.17"
FlowerCore__AI__Voice__Piper__Port: "10400"
FlowerCore__AI__Voice__OutputRoot: "/data/audio"
FlowerCore__AI__Voice__RetentionDays: "30"
# LLM provider abstraction (ADR-088). Anthropic stays disabled here -- when
# an operator wants to enable Claude, they flip Enabled=true and mount
# FlowerCore__Anthropic__ApiKey from the onepassword-synced Secret (see
# docs/ai-agents/anthropic-integration.md).
FlowerCore__Anthropic__Enabled: "false"
FlowerCore__Anthropic__BaseUrl: "https://api.anthropic.com"
FlowerCore__Anthropic__DefaultModel: "claude-sonnet-4-6"
FlowerCore__Anthropic__CheapModel: "claude-haiku-4-5-20251001"
FlowerCore__Anthropic__DeepModel: "claude-opus-4-7"
FlowerCore__Budget__ResponseCacheEnabled: "true"
OTEL_SERVICE_NAME: FlowerCore.Chat
OTEL_EXPORTER_OTLP_ENDPOINT: "http://otel-collector.monitoring.svc.cluster.local:4317"
OTEL_EXPORTER_OTLP_PROTOCOL: grpc
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: chat-web-data
namespace: fc-chat
labels:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
volumeMode: Filesystem
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: chat-web
namespace: fc-chat
labels:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/name: chat-web
template:
metadata:
labels:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
spec:
nodeSelector:
kubernetes.io/hostname: rke2-server
securityContext:
fsGroup: 1654
fsGroupChangePolicy: OnRootMismatch
containers:
- name: chat-web
image: localhost/fc-chat-web:v20260614-regroup-ch6-37285d8
imagePullPolicy: Never
ports:
- name: http
containerPort: 8080
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
envFrom:
- configMapRef:
name: chat-web-config
- secretRef:
name: chat-web-secret
optional: true
env:
- name: FlowerCore__Auth__Oidc__Authority
valueFrom:
secretKeyRef:
name: chat-oidc-client
key: issuer_url
optional: true
- name: FlowerCore__Auth__Oidc__ClientId
valueFrom:
secretKeyRef:
name: chat-oidc-client
key: client_id
optional: true
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: chat-oidc-client
key: client_secret
optional: true
volumeMounts:
- name: data
mountPath: /data
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "512Mi"
cpu: "500m"
readinessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 6
livenessProbe:
httpGet:
path: /healthz
port: 8080
initialDelaySeconds: 30
periodSeconds: 30
timeoutSeconds: 5
failureThreshold: 3
volumes:
- name: data
persistentVolumeClaim:
claimName: chat-web-data
---
apiVersion: v1
kind: Service
metadata:
name: chat-web
namespace: fc-chat
labels:
app.kubernetes.io/name: chat-web
app.kubernetes.io/part-of: flowercore
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: chat-web
ports:
- name: http
port: 80
targetPort: 8080
protocol: TCP
# FlowerCore Chat — TLS + Ingress
# Deployment and Service managed by deploy script (not ArgoCD)
---
apiVersion: cert-manager.io/v1
kind: Certificate
@@ -232,41 +30,3 @@ spec:
port: 80
tls:
secretName: chat-web-tls
---
# Public host profile marker. The app treats this header as authoritative for
# the public twin, while the internal chat.iamworkin.lan route does not attach
# it and keeps the operator-oriented UI.
apiVersion: traefik.io/v1alpha1
kind: Middleware
metadata:
name: chat-public-profile-header
namespace: fc-chat
spec:
headers:
customRequestHeaders:
X-FC-Chat-Host-Profile: "public"
---
# Public Cloudflare-fronted twin for the anonymous chat surface. Operator
# paths are intentionally absent from the allowlist below, so /admin,
# /operator, /console, /ops, /api/operator, and /operatorhub miss this route
# and return Traefik 404 before reaching the pod. Operator action still needed:
# create/verify Cloudflare DNS chat.flowercore.io -> public Traefik endpoint
# and mirror the cf-origin-flowercore-io TLS secret into namespace fc-chat.
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: chat-web-public
namespace: fc-chat
spec:
entryPoints:
- websecure
routes:
- match: Host(`chat.flowercore.io`) && (Path(`/`) || Path(`/chat`) || PathPrefix(`/_blazor`) || PathPrefix(`/_framework`) || PathPrefix(`/_content`) || PathPrefix(`/avatars`) || PathPrefix(`/css`) || PathPrefix(`/js`) || PathPrefix(`/favicon`) || PathPrefix(`/chathub`)) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
kind: Rule
middlewares:
- name: chat-public-profile-header
services:
- name: chat-web
port: 80
tls:
secretName: cf-origin-flowercore-io

View File

@@ -14,20 +14,6 @@
# cluster-rebuild repeatability. See
# feedback_networkpolicies_belong_in_bluejay_infra.md.
---
# OIDC client secret for the RemoteDesktop end-user sign-in (fleet regroup L9,
# 2026-06-12). The Authentik provider `remotedesktop` already exists; the 1P item
# `remotedesktop-oidc-client` (vault IAmWorkin) carries issuer_url / client_id /
# client_secret, and the 1Password operator mints the same-named K8s Secret that
# k8s/web-deployment.yaml (FlowerCore.RemoteDesktop repo) consumes with
# optional:true. Gate stays OFF (Q-RD-16) — this is flip-READINESS only.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: remotedesktop-oidc-client
namespace: fc-desktop
spec:
itemPath: "vaults/IAmWorkin/items/remotedesktop-oidc-client"
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
@@ -65,26 +51,3 @@ spec:
port: 8080
tls:
secretName: remotedesktop-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose remotedesktop-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: remotedesktop-web-public
# namespace: fc-desktop
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`desktop.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: remotedesktop-web-public-profile-header # injects entitlement profile
# services:
# - name: remotedesktop-web
# port: 8080
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,26 +0,0 @@
# Runtime secrets for FlowerCore.DeviceManagement.
#
# OnePasswordItem operator syncs this item into a Kubernetes Secret with the
# same name. Expected fields:
# DB-Password
# mtls-ca.pem
# mtls-client.crt
# mtls-client.key
# mtls-chain.pem
#
# Do not add literal secret values to this repo. Runtime pods consume the
# synced Secret through env vars and read-only mounts.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: fc-devicemgmt-runtime
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt
app.kubernetes.io/component: secrets
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
itemPath: "vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime"

View File

@@ -1,70 +0,0 @@
# Admin / Helpdesk Console — Infra Finding (Cl-5, ADR-204)
**Outcome: ZERO new cluster infra required.** The Admin/helpdesk console rides the
existing `FlowerCore.DeviceManagement.Web` deploy as routes inside DM.Web (ADR-204).
The ingress already in this directory covers every path the admin console serves.
## What already exists for DM.Web (this directory)
| Manifest | Resource | Notes |
|----------|----------|-------|
| `certificate-web.yaml` | cert-manager `Certificate` `fc-devicemgmt-web-tls` | `issuerRef``step-ca-acme` `ClusterIssuer`; `dnsNames: [devices.iamworkin.lan]`; `secretName: fc-devicemgmt-web-tls`. DNS preflight gate documented (pfSense A record `devices.iamworkin.lan → 10.0.56.200` required before ACME sync). |
| `ingressroute-web.yaml` | Traefik `IngressRoute` `fc-devicemgmt-web` | `entryPoints: [websecure]`, `match: Host(\`devices.iamworkin.lan\`)`, service `fc-devicemgmt-web:80`, `tls.secretName: fc-devicemgmt-web-tls`. |
| `service-web.yaml` | `Service` `fc-devicemgmt-web` (ClusterIP, 80→8080) | Owned by the DM.Web deploy. |
| `deployment-web.yaml` | `Deployment` `fc-devicemgmt-web` | Currently `replicas: 0` (gated on fc-mysql operator + `flowercore_devicemgmt` DB + 1Password runtime item — see header comment). Not a Cl-5 concern. |
| also present | operator RBAC, namespace, network-policy, 1password-item | Full app dir, ArgoCD-managed. |
## Why the admin console needs nothing new
The existing IngressRoute matches **`Host(\`devices.iamworkin.lan\`)` with no `PathPrefix`
constraint**. Traefik therefore forwards *all* paths on that host to the
`fc-devicemgmt-web` service — including any admin/helpdesk routes the DM.Web app exposes
under its `FlowerCore:PathBase` (e.g. `/admin`, `/helpdesk`). The same TLS secret
(`fc-devicemgmt-web-tls`) and the same step-ca ACME `Certificate` already protect them.
This matches the established TLS-only-app pattern (e.g. `apps/fc-library/fc-library.yaml`,
`apps/fc-retail/fc-retail.yaml`): `Certificate` (issuerRef `step-ca-acme` ClusterIssuer) +
host-matched `IngressRoute` sharing the `secretName`. Per ADR-204 the admin console's
Deployment/Service stay with the DM.Web deploy — no separate workload is created.
ArgoCD repo URL convention (for reference, not changed here):
`http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`
(internal HTTP — step-ca cert isn't trusted by ArgoCD). Apps in `apps/*` are picked up by
the `bluejay-infra` ApplicationSet directory generator; this dir has no `kustomization.yaml`,
consistent with that pattern.
## Recommendation
**Ride DM.Web at a PathBase path → no new Certificate, no new IngressRoute, no new
Deployment/Service.** Close the lane. The admin console reaches users at
`https://devices.iamworkin.lan/<PathBase>` through the manifests already in this directory.
## Open question (operator decision — NOT actioned)
**Q-MP-ADMIN-HOST — Distinct admin hostname vs PathBase path under DM.Web?**
If the operator ever wants the admin/helpdesk console on its *own* hostname
(e.g. `admin.iamworkin.lan`) rather than a path under `devices.iamworkin.lan`, that is a
deliberate routing/auth-surface choice, not a mechanical infra add. It would require:
1. a pfSense / FlowerCore.DNS A record `admin.iamworkin.lan → 10.0.56.200` (ACME preflight
gate — step-ca HTTP-01 can't see the CoreDNS wildcard);
2. a second cert-manager `Certificate` (`step-ca-acme` ClusterIssuer, `dnsNames:
[admin.iamworkin.lan]`, own `secretName`);
3. a second host-matched `IngressRoute` → the same `fc-devicemgmt-web:80` service
(still no new Deployment/Service — same app behind a second host).
**Default taken (do not block): PathBase path under DM.Web = zero new infra.** A separate
admin hostname is left UNBUILT pending an explicit operator answer to Q-MP-ADMIN-HOST,
because it changes the public/auth surface and conflicts with the ADR-204 "routes inside
DM.Web" intent. If the answer is "separate host," author only the `Certificate` +
`IngressRoute` above (no Deployment/Service), mirroring `apps/fc-library/fc-library.yaml`.
## Verification
- `kubectl apply --dry-run=client` (kubectl v1.34.2, no live cluster): `ingressroute-web.yaml`,
`service-web.yaml`, `deployment-web.yaml` validated clean. `certificate-web.yaml` returned
"no matches for kind Certificate in cert-manager.io/v1" — expected with no cluster
connection (CRD discovery unavailable client-side); the YAML shape is identical to the
proven `fc-library` Certificate. Server-side dry-run + live host resolution =
**fix-forward** (cluster may be unreachable from this lane).
- No manifest authored or changed by this lane — finding note only.

View File

@@ -1,30 +0,0 @@
# Certificate for devices.iamworkin.lan.
#
# Preflight gate: FlowerCore.DNS / pfSense must contain an explicit A record:
# devices.iamworkin.lan -> 10.0.56.200
# before this Certificate is synced. step-ca ACME cannot see the CoreDNS
# wildcard, so missing pfSense DNS produces cert-manager HTTP-01 backoff
# (feedback_pfsense_dns_required_for_acme).
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: fc-devicemgmt-web-tls
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/dns-preflight: "devices.iamworkin.lan must resolve to 10.0.56.200 before ACME sync"
spec:
secretName: fc-devicemgmt-web-tls
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
dnsNames:
- devices.iamworkin.lan
duration: 720h
renewBefore: 240h

View File

@@ -1,83 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: fc-devicemgmt-operator
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
rules:
- apiGroups:
- flowercore.io
resources:
- '*'
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- flowercore.io
resources:
- devices/status
- devices/finalizers
- devicegroups/status
- devicegroups/finalizers
- devicepolicies/status
- devicepolicies/finalizers
- remotecommands/status
- remotecommands/finalizers
- desiredstatedocuments/status
- desiredstatedocuments/finalizers
verbs:
- get
- update
- patch
- apiGroups:
- apps
resources:
- deployments
verbs:
- get
- apiGroups:
- ""
resources:
- pods
- services
- configmaps
- secrets
- events
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- batch
resources:
- jobs
verbs:
- get
- list
- watch
- create
- update
- patch
- delete
- apiGroups:
- networking.k8s.io
resources:
- networkpolicies
verbs:
- get
- list
- watch

View File

@@ -1,19 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: fc-devicemgmt-operator
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: fc-devicemgmt-operator
subjects:
- kind: ServiceAccount
name: fc-devicemgmt-operator
namespace: fc-devicemgmt

View File

@@ -1,186 +0,0 @@
# FlowerCore.DeviceManagement CRDs.
#
# These CRDs match the current operator annotations:
# [KubernetesEntity(Group = "flowercore.io", ApiVersion = "v1alpha1", ...)]
# Keep the schemas intentionally permissive until the DeviceManagement operator
# grows enforced CRD validation.
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devices.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devices
singular: device
kind: Device
listKind: DeviceList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicegroups.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicegroups
singular: devicegroup
kind: DeviceGroup
listKind: DeviceGroupList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: devicepolicies.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: devicepolicies
singular: devicepolicy
kind: DevicePolicy
listKind: DevicePolicyList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: remotecommands.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: remotecommands
singular: remotecommand
kind: RemoteCommand
listKind: RemoteCommandList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: desiredstatedocuments.flowercore.io
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
group: flowercore.io
scope: Namespaced
names:
plural: desiredstatedocuments
singular: desiredstatedocument
kind: DesiredStateDocument
listKind: DesiredStateDocumentList
versions:
- name: v1alpha1
served: true
storage: true
subresources:
status: {}
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
x-kubernetes-preserve-unknown-fields: true
status:
type: object
x-kubernetes-preserve-unknown-fields: true

View File

@@ -1,109 +0,0 @@
# FlowerCore.DeviceManagement Operator.
#
# KubeOps controller for devices.flowercore.io resources. Operator-created
# children must set OwnerReferences + traceability labels/annotations per
# k8s-pod-ownership-and-traceability-standard.md. RBAC below grants
# apps/deployments/get so the process can resolve its own Deployment UID.
apiVersion: apps/v1
kind: Deployment
metadata:
name: fc-devicemgmt-operator
namespace: fc-devicemgmt
labels:
app: fc-devicemgmt-operator
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec:
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app: fc-devicemgmt-operator
template:
metadata:
labels:
app: fc-devicemgmt-operator
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
flowercore.io/audit-trace-id: "runtime-activity-trace"
spec:
serviceAccountName: fc-devicemgmt-operator
securityContext:
fsGroup: 1654
fsGroupChangePolicy: OnRootMismatch
containers:
- name: operator
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
imagePullPolicy: Never
ports:
- name: metrics
containerPort: 8080
env:
- name: ASPNETCORE_ENVIRONMENT
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT
value: "fc-devicemgmt-operator"
- name: FlowerCore__Service__Name
value: "FlowerCore.DeviceManagement.Operator"
- name: FlowerCore__DeviceManagement__DefaultTenantId
value: "system"
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
readinessProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 20
periodSeconds: 30
securityContext:
runAsNonRoot: true
runAsUser: 1654
runAsGroup: 1654
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumeMounts:
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
volumes:
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}

View File

@@ -1,163 +0,0 @@
# FlowerCore.DeviceManagement Web.
#
# Source repo is expected to ship FlowerCore.DeviceManagement.Web in a later
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
# nodes before letting ArgoCD sync a live rollout.
#
# LIVE — 2026-06-11 DeviceManagement product-host enablement.
# The current DeviceManagement Web source is SQLite-backed in Program.cs, so
# Phase 1 production uses a Longhorn RWO PVC at /data/devicemgmt.db. The
# 1Password runtime item stays mounted through env for future MySQL/API-key
# cutover, but MySQL is not required for this first product-host rollout.
# Image v20260613-g2-66a43c1 is built from FlowerCore.DeviceManagement master
# 66a43c1, carrying edge enrollment network completion and SQLite-safe trust-bundle smoke coverage.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: fc-devicemgmt-web-data
namespace: fc-devicemgmt
labels:
app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 1Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: fc-devicemgmt-web
namespace: fc-devicemgmt
labels:
app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec:
replicas: 1
revisionHistoryLimit: 3
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
selector:
matchLabels:
app: fc-devicemgmt-web
template:
metadata:
labels:
app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
flowercore.io/audit-trace-id: "runtime-activity-trace"
spec:
securityContext:
fsGroup: 1654
fsGroupChangePolicy: OnRootMismatch
containers:
- name: web
image: localhost/fc-devicemgmt-web:v20260614-regroup-c5b8f82
imagePullPolicy: Never
ports:
- name: http
containerPort: 8080
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
- name: ASPNETCORE_ENVIRONMENT
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
- name: HOME
value: "/data"
- name: FlowerCore__Service__Name
value: "FlowerCore.DeviceManagement.Web"
- name: FlowerCore__DeviceManagement__DefaultTenantId
value: "system"
- name: FlowerCore__Database__Provider
value: "Sqlite"
- name: FlowerCore__Database__ConnectionStrings__Sqlite
value: "Data Source=/data/devicemgmt.db"
- name: FlowerCore__Database__Password
valueFrom:
secretKeyRef:
name: fc-devicemgmt-runtime
key: DB-Password
- name: FlowerCore__EventBus__Redis__Configuration
value: "redis.fc-redis.svc:6379"
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 1000m
memory: 768Mi
startupProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 30
readinessProbe:
tcpSocket:
port: 8080
periodSeconds: 10
failureThreshold: 3
livenessProbe:
tcpSocket:
port: 8080
initialDelaySeconds: 30
periodSeconds: 30
failureThreshold: 3
securityContext:
runAsNonRoot: true
runAsUser: 1654
runAsGroup: 1654
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumeMounts:
- name: data
mountPath: /data
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
volumes:
- name: data
persistentVolumeClaim:
claimName: fc-devicemgmt-web-data
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}

View File

@@ -1,55 +0,0 @@
# LAN ingress for FlowerCore.DeviceManagement Web.
#
# RKE2 Traefik has no built-in ACME resolver configured. Keep TLS certificate
# ownership in cert-manager Certificate/fc-devicemgmt-web-tls.
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: fc-devicemgmt-web
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
entryPoints:
- websecure
routes:
- match: Host(`devices.iamworkin.lan`)
kind: Rule
services:
- name: fc-devicemgmt-web
port: 80
tls:
secretName: fc-devicemgmt-web-tls
# Future public agent/update host gate (OFF by default):
#
# Do not enable `update.flowercore.io` here until Authentik OIDC Q-OIDC-1
# resolves the public-device-management auth model and route ownership with
# UpdateCenter. When enabled, use a separate public IngressRoute with an
# explicit Method allowlist, public-host auth middleware, and public TLS
# certificate strategy. Leaving this as comments keeps ArgoCD from stealing
# live UpdateCenter traffic.
#
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: fc-devicemgmt-web-public
# namespace: fc-devicemgmt
# annotations:
# flowercore.io/public-host-gate: "disabled-until-Q-OIDC-1"
# spec:
# entryPoints:
# - websecure
# routes:
# - match: Host(`update.flowercore.io`) && (Method(`GET`) || Method(`HEAD`) || Method(`POST`) || Method(`OPTIONS`))
# kind: Rule
# services:
# - name: fc-devicemgmt-web
# port: 80
# tls:
# secretName: fc-devicemgmt-public-tls

View File

@@ -1,13 +0,0 @@
# FlowerCore.DeviceManagement namespace.
#
# ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.
apiVersion: v1
kind: Namespace
metadata:
name: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra

View File

@@ -1,224 +0,0 @@
# FlowerCore.DeviceManagement NetworkPolicies.
#
# NetworkPolicies belong in bluejay-infra so ArgoCD owns rebuild state.
# Rules include Traefik post-DNAT backend ports per
# feedback_netpol_dnat_backend_port and Synology NFS egress for the requested
# cold-tier / future artifact path.
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: fc-devicemgmt-web-isolation
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
podSelector:
matchLabels:
app: fc-devicemgmt-web
policyTypes:
- Ingress
- Egress
ingress:
# LAN edge: only cluster Traefik should reach the Web pod for
# devices.iamworkin.lan.
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: traefik-system
podSelector:
matchLabels:
app.kubernetes.io/name: traefik
ports:
- port: 8080
protocol: TCP
# Direct LAN diagnostics are allowed only from FlowerCore LAN/VPN ranges.
- from:
- ipBlock:
cidr: 10.0.56.0/24
- ipBlock:
cidr: 10.0.57.0/24
- ipBlock:
cidr: 10.0.58.0/24
- ipBlock:
cidr: 10.0.68.0/27
ports:
- port: 8080
protocol: TCP
egress:
# CoreDNS.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
# Database namespace.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: fc-mysql
ports:
- port: 3306
protocol: TCP
# Redis backplane for multi-replica SignalR / live-status fan-out.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: fc-redis
ports:
- port: 6379
protocol: TCP
# Traefik VIP / in-cluster Traefik for self-callbacks and public URL
# generation tests. Include post-DNAT backend ports 8443 + 8080.
- to:
- ipBlock:
cidr: 10.0.56.200/32
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: traefik-system
podSelector:
matchLabels:
app.kubernetes.io/name: traefik
ports:
- port: 80
protocol: TCP
- port: 443
protocol: TCP
- port: 8080
protocol: TCP
- port: 8443
protocol: TCP
# Agent egress: LAN/VPN devices may run DM Agent in Generic, Kiosk, Pi,
# ThinClient, or Server mode. Keep this private-range only.
- to:
- ipBlock:
cidr: 10.0.56.0/24
- ipBlock:
cidr: 10.0.57.0/24
- ipBlock:
cidr: 10.0.58.0/24
- ipBlock:
cidr: 10.0.68.0/27
ports:
- port: 80
protocol: TCP
- port: 443
protocol: TCP
- port: 8080
protocol: TCP
- port: 8443
protocol: TCP
- port: 5000
protocol: TCP
- port: 5001
protocol: TCP
# Synology NFS cold-tier / artifact mount allowance.
- to:
- ipBlock:
cidr: 10.0.58.3/32
ports:
- port: 2049
protocol: TCP
- port: 2049
protocol: UDP
- port: 111
protocol: TCP
- port: 111
protocol: UDP
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: fc-devicemgmt-operator-isolation
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
podSelector:
matchLabels:
app: fc-devicemgmt-operator
policyTypes:
- Ingress
- Egress
ingress:
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 8080
protocol: TCP
egress:
# CoreDNS.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
# Kubernetes API for KubeOps reconciliation and Deployment UID lookup.
- to: []
ports:
- port: 443
protocol: TCP
- port: 6443
protocol: TCP
# Agent egress for operator-initiated probes / fallback command dispatch.
- to:
- ipBlock:
cidr: 10.0.56.0/24
- ipBlock:
cidr: 10.0.57.0/24
- ipBlock:
cidr: 10.0.58.0/24
- ipBlock:
cidr: 10.0.68.0/27
ports:
- port: 80
protocol: TCP
- port: 443
protocol: TCP
- port: 8080
protocol: TCP
- port: 8443
protocol: TCP
- port: 5000
protocol: TCP
- port: 5001
protocol: TCP
# Synology NFS allowance for future cold-tier/audit archival jobs.
- to:
- ipBlock:
cidr: 10.0.58.3/32
ports:
- port: 2049
protocol: TCP
- port: 2049
protocol: UDP
- port: 111
protocol: TCP
- port: 111
protocol: UDP

View File

@@ -1,22 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: fc-devicemgmt-web
namespace: fc-devicemgmt
labels:
app: fc-devicemgmt-web
app.kubernetes.io/name: fc-devicemgmt-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
type: ClusterIP
selector:
app: fc-devicemgmt-web
ports:
- name: http
port: 80
targetPort: 8080
protocol: TCP

View File

@@ -1,12 +0,0 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: fc-devicemgmt-operator
namespace: fc-devicemgmt
labels:
app.kubernetes.io/name: fc-devicemgmt-operator
app.kubernetes.io/component: operator
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra

View File

@@ -74,14 +74,6 @@ metadata:
spec:
itemPath: "vaults/IAmWorkin/items/FlowerCore Edition Signing Key - edition:aistation-field"
---
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: distribution-oidc-client
namespace: fc-distribution
spec:
itemPath: "vaults/IAmWorkin/items/distribution-oidc-client"
---
apiVersion: apps/v1
kind: Deployment
metadata:
@@ -109,7 +101,6 @@ spec:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
# Synology NFS export `/volume1/kubernetes` ACL only allows rke2-server
# (10.0.56.11) right now. Until the ACL is widened in DSM (admin only),
@@ -127,7 +118,7 @@ spec:
# dotnet.exe publish -c Release -o deploy/app \
# src/FlowerCore.Distribution.Web/FlowerCore.Distribution.Web.csproj
# podman build -t localhost/fc-distribution:v<tag> -f deploy/Dockerfile.deploy deploy
image: localhost/fc-distribution:v20260604-oidc-root-anon
image: localhost/fc-distribution:v202605061948
imagePullPolicy: Never
ports:
- containerPort: 8080
@@ -139,25 +130,6 @@ spec:
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
# Authentik/OIDC enforcement. Public read/entitlement + the
# dist.flowercore.io Method() allowlist stay open; OIDC gates the
# operator/admin surface while /healthz remains anonymous.
- name: FlowerCore__Auth__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Authority
value: "https://id.iamworkin.lan/application/o/distribution/"
- name: FlowerCore__Auth__Oidc__Audience
value: "distribution"
- name: FlowerCore__Auth__Oidc__ClientId
value: "distribution"
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: distribution-oidc-client
key: client_secret
optional: true
# SQLite connection (catalog + data-protection keys via FlowerCoreDbContext).
# Read by Data/DatabaseProviderExtensions.cs in precedence order; Sqlite key wins.
- name: FlowerCore__Database__Provider

View File

@@ -1,45 +0,0 @@
# FlowerCore Divoom DM Pi Device
Source-controlled Puppet/Hiera deployment contract for registering the edge2
Divoom MiniToo panel as a FlowerCore DeviceManagement-managed Pi device.
This is not a Kubernetes application. The live panel remains the existing
edge2 `flowercore-divoom.service` managed by `FlowerCore.Puppet`
`profile::pi::service::divoom`, with the .NET payload deployed out of band
and `/opt/flowercore/divoom/data` plus the Bluetooth shell wrappers preserved.
Because edge2 is already Hiera-driven through `profile::pi::service::apps`,
the deploy home is additive `profile::pi::service` data/profile source, not
`profile::edge::service::apps` and not an ArgoCD/K8s app.
## Scope
- Stage DeviceManagement registration metadata for the edge2 Divoom MiniToo.
- Stage a separate, disabled-by-default DM Agent executor unit for privileged
Bluetooth operations once the DM-RPC lane lands.
- Keep `flowercore-divoom.service` and `flowercore-divoom-bt.service`
untouched: no service replacement, no restart subscription, no K8s surface.
- Preserve the current wrapper contract:
`/opt/flowercore/divoom/bt-link.sh`,
`/opt/flowercore/divoom/bt-reset.sh`, and
`/opt/flowercore/divoom/audio-link.sh`.
- Keep FM radio disabled and require visible render proof; device-info echo is
not render proof.
## Artifact Map
| Path | Use |
| --- | --- |
| `hiera/edge2-divoom-dm-device.overlay.yaml` | Additive Hiera overlay for edge2. Merge into the existing node YAML without removing `fc-pimanager` or `fc-divoom`. |
| `puppet/profile/pi/service/divoom_dm_device.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet` after the DM-RPC executor binary exists. |
| `puppet/templates/divoom-device-registration.json.epp` | DM device registration metadata rendered on edge2. |
| `puppet/templates/flowercore-divoom-dm-agent.service.epp` | Separate DM Agent systemd unit. Defaults are stopped and disabled until a later cutover. |
## Rollout Notes
1. Land these artifacts in bluejay-infra as the deploy contract.
2. Vendor the Puppet profile and EPP templates into `FlowerCore.Puppet`.
3. Merge the Hiera overlay into `data/nodes/edge2.iamworkin.lan.yaml`.
4. Run Puppet in noop first, preferably with a node-local validation directory
under `~/.fcv` rather than `/tmp`.
5. Only enable the DM Agent service after the DeviceManagement BT executor has
landed and passed operator-eyeball render proof.

View File

@@ -1,32 +0,0 @@
---
# Merge into FlowerCore.Puppet data/nodes/edge2.iamworkin.lan.yaml.
# Additive overlay only: keep the existing fc-pimanager version/tarball entry,
# keep fc-divoom enabled, and do not move Divoom into Kubernetes.
profile::pi::service::apps:
fc-pimanager:
binary: 'FlowerCore.PiManager.Web'
install_dir: '/opt/fc-pimanager'
port: 5000
environment: 'edge2'
version: '2026.05.28.1646'
tarball_source: 'puppet:///modules/profile/pi/builds/fc-pimanager.tar.gz'
fc-divoom:
enabled: true
profile::pi::service::divoom_dm_device::ensure: 'present'
profile::pi::service::divoom_dm_device::service_enabled: false
profile::pi::service::divoom_dm_device::service_ensure: 'stopped'
profile::pi::service::divoom_dm_device::device_id: 'edge2-divoom-minitoo'
profile::pi::service::divoom_dm_device::display_name: 'edge2 Divoom MiniToo'
profile::pi::service::divoom_dm_device::host_fqdn: 'edge2.iamworkin.lan'
profile::pi::service::divoom_dm_device::dm_web_url: 'https://devicemgmt.iamworkin.lan'
profile::pi::service::divoom_dm_device::divoom_install_dir: '/opt/flowercore/divoom'
profile::pi::service::divoom_dm_device::agent_install_dir: '/opt/flowercore/devicemanagement-agent'
profile::pi::service::divoom_dm_device::bt_candidate_channels:
- '1'
- '10'
profile::pi::service::divoom_dm_device::default_bt_channel: '1'
profile::pi::service::divoom_dm_device::a2dp_default_state: 'off'
profile::pi::service::divoom_dm_device::fm_radio_enabled: false
profile::pi::service::divoom_dm_device::visible_render_proof_required: true

View File

@@ -1,140 +0,0 @@
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_dm_device.pp.
# This profile is additive to profile::pi::service::divoom. It must not manage,
# restart, replace, or subscribe the existing flowercore-divoom.service.
class profile::pi::service::divoom_dm_device (
Enum['present', 'absent'] $ensure = 'present',
Boolean $service_enabled = false,
Enum['running', 'stopped'] $service_ensure = 'stopped',
String $service_name = 'flowercore-divoom-dm-agent',
String $device_id = 'edge2-divoom-minitoo',
String $display_name = 'edge2 Divoom MiniToo',
String $host_fqdn = 'edge2.iamworkin.lan',
String $dm_web_url = 'https://devicemgmt.iamworkin.lan',
String $divoom_install_dir = '/opt/flowercore/divoom',
String $agent_install_dir = '/opt/flowercore/devicemanagement-agent',
String $agent_binary = 'FlowerCore.DeviceManagement.Agent',
Array[String] $bt_candidate_channels = ['1', '10'],
String $default_bt_channel = '1',
Enum['on', 'off'] $a2dp_default_state = 'off',
Boolean $fm_radio_enabled = false,
Boolean $visible_render_proof_required = true,
) {
include profile::workstation::safe_account_exclusion
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
$config_dir = '/etc/flowercore/device-management/devices'
$state_dir = '/var/lib/flowercore/divoom-dm-agent'
$log_dir = '/var/log/flowercore/divoom-dm-agent'
$registration_path = "${config_dir}/${device_id}.json"
$agent_binary_path = "${agent_install_dir}/${agent_binary}"
$bt_channels_json = inline_template('[<%= @bt_candidate_channels.map { |c| "\"#{c}\"" }.join(", ") %>]')
if $safe_account {
notify { 'fc-divoom-dm-device safe-account exclusion':
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom DM Pi device profile refused to apply on operator workstation',
}
if $facts['os']['family'] != 'windows' {
ensure_resource('file', '/var/log/flowercore-audit', {
'ensure' => 'directory',
'owner' => 'root',
'group' => 'root',
'mode' => '0755',
})
file { '/var/log/flowercore-audit/safe-account-noop-fc-divoom-dm-device.log':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
content => "noop: divoom dm pi device profile refused to apply on safe-account host\n",
require => File['/var/log/flowercore-audit'],
}
}
} elsif $ensure == 'absent' {
service { $service_name:
ensure => stopped,
enable => false,
}
file { [
"/etc/systemd/system/${service_name}.service",
$registration_path,
]:
ensure => absent,
}
exec { 'fc-divoom-dm-agent-systemd-reload':
command => '/usr/bin/systemctl daemon-reload',
refreshonly => true,
path => ['/usr/bin', '/bin'],
}
} else {
case $facts['os']['family'] {
'Debian': {}
default: { fail("profile::pi::service::divoom_dm_device only supports Debian-family OS, got ${facts['os']['family']}") }
}
file { [$config_dir, $state_dir, $log_dir]:
ensure => directory,
owner => 'root',
group => 'root',
mode => '0755',
}
file { $registration_path:
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
content => epp('profile/pi/fc_divoom_dm/divoom-device-registration.json.epp', {
'device_id' => $device_id,
'display_name' => $display_name,
'host_fqdn' => $host_fqdn,
'divoom_install_dir' => $divoom_install_dir,
'bt_channels_json' => $bt_channels_json,
'default_bt_channel' => $default_bt_channel,
'a2dp_default_state' => $a2dp_default_state,
'fm_radio_enabled' => $fm_radio_enabled,
'visible_render_proof_required' => $visible_render_proof_required,
}),
require => File[$config_dir],
}
file { "/etc/systemd/system/${service_name}.service":
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
content => epp('profile/pi/fc_divoom_dm/flowercore-divoom-dm-agent.service.epp', {
'service_name' => $service_name,
'device_id' => $device_id,
'dm_web_url' => $dm_web_url,
'registration_path' => $registration_path,
'divoom_install_dir' => $divoom_install_dir,
'agent_install_dir' => $agent_install_dir,
'agent_binary_path' => $agent_binary_path,
'state_dir' => $state_dir,
'log_dir' => $log_dir,
}),
notify => Exec['fc-divoom-dm-agent-systemd-reload'],
require => File[$registration_path],
}
exec { 'fc-divoom-dm-agent-systemd-reload':
command => '/usr/bin/systemctl daemon-reload',
refreshonly => true,
path => ['/usr/bin', '/bin'],
}
service { $service_name:
ensure => $service_ensure,
enable => $service_enabled,
require => [
File["/etc/systemd/system/${service_name}.service"],
File[$registration_path],
Exec['fc-divoom-dm-agent-systemd-reload'],
],
}
}
}

View File

@@ -1,34 +0,0 @@
{
"deviceId": "<%= $device_id %>",
"displayName": "<%= $display_name %>",
"hostFqdn": "<%= $host_fqdn %>",
"kind": "DivoomMiniToo",
"managedBy": "FlowerCore.DeviceManagement",
"executionMode": "Pi",
"transport": {
"kind": "BluetoothSerial",
"candidateChannels": <%= $bt_channels_json %>,
"defaultChannel": "<%= $default_bt_channel %>",
"deviceInfoIsRenderProof": false,
"visibleRenderProofRequired": <%= $visible_render_proof_required %>
},
"paths": {
"divoomInstallDir": "<%= $divoom_install_dir %>",
"btLink": "<%= $divoom_install_dir %>/bt-link.sh",
"btReset": "<%= $divoom_install_dir %>/bt-reset.sh",
"audioLink": "<%= $divoom_install_dir %>/audio-link.sh"
},
"capabilities": {
"supportsBluetoothSerial": true,
"supportsBtChannelRedetect": true,
"supportsBtHardReset": true,
"supportsBtAudioProfileSwitch": true,
"a2dpDefaultState": "<%= $a2dp_default_state %>",
"fmRadioEnabled": <%= $fm_radio_enabled %>
},
"safety": {
"preserveExistingService": "flowercore-divoom.service",
"preserveDataDirectory": "<%= $divoom_install_dir %>/data",
"doNotEnableFmRadio": true
}
}

View File

@@ -1,36 +0,0 @@
[Unit]
Description=FlowerCore Divoom DM Agent Bluetooth executor
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
Wants=network-online.target
After=network-online.target bluetooth.service
Requires=bluetooth.service
ConditionPathExists=<%= $agent_binary_path %>
ConditionPathExists=<%= $registration_path %>
ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh
ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh
ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh
[Service]
Type=simple
User=stoltz
Group=stoltz
WorkingDirectory=<%= $agent_install_dir %>
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
Environment=FLOWERCORE_DM_DEVICE_REGISTRATION=<%= $registration_path %>
Environment=Divoom__Bluetooth__DeviceInfoIsRenderProof=false
Environment=Divoom__Bluetooth__VisibleRenderProofRequired=true
Environment=Divoom__Bluetooth__A2dpDefaultState=off
ExecStart=<%= $agent_binary_path %> --mode=Pi --device-id=<%= $device_id %> --dm-web-url=<%= $dm_web_url %> --registration=<%= $registration_path %>
Restart=on-failure
RestartSec=10s
StartLimitBurst=3
StartLimitIntervalSec=300s
SupplementaryGroups=bluetooth audio dialout
NoNewPrivileges=true
PrivateTmp=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=<%= $state_dir %> <%= $log_dir %>
[Install]
WantedBy=multi-user.target

View File

@@ -1,44 +0,0 @@
# FlowerCore Divoom TV Pi HDMI
Source-controlled deploy shape for the native `FlowerCore.Divoom.Tv`
Avalonia HDMI renderer on a Raspberry Pi connected to a TV.
This is a Puppet/systemd appliance bundle, not a Kubernetes application. It
mirrors the existing `fc-signage-pi-player` pattern: bluejay-infra carries the
systemd units, scripts, Hiera shape, and Puppet profile source that
`FlowerCore.Puppet` vendors and installs.
## Scope
- Launch the future `FlowerCore.Divoom.Tv` linux-arm64 self-contained payload
from `/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv`.
- Prefer `cage` as the Wayland fullscreen compositor, with direct app launch as
a fallback for development images.
- Restart the app after HDMI hotplug with a 2 second DRM settle delay.
- Keep all runtime state local: `/var/lib/fc-divoom-tv` and
`/var/log/fc-divoom-tv`.
- Avoid CDN/runtime fetches; the app renders the in-house Divoom scene catalog
locally.
## Artifact Map
| Path | Use |
| --- | --- |
| `systemd/flowercore-divoom-tv.service` | Fullscreen Avalonia HDMI app service. |
| `systemd/flowercore-divoom-tv-hdmi.service` | HDMI hotplug responder service. |
| `systemd/99-flowercore-divoom-tv-hdmi.rules` | DRM udev hotplug rule. |
| `scripts/flowercore-divoom-tv-prelaunch.sh` | Preflight checks and local directory creation. |
| `scripts/flowercore-divoom-tv-launch.sh` | Cage-first fullscreen launcher. |
| `scripts/flowercore-divoom-tv-hdmi-respond.sh` | Hotplug settle and restart script. |
| `puppet/profile/pi/service/divoom_tv.pp` | Puppet profile shape to vendor into `FlowerCore.Puppet`. |
| `hiera/example-divoom-tv-pi.iamworkin.lan.yaml` | Example node Hiera for a Divoom TV Pi. |
## Rollout Notes
1. Build `FlowerCore.Divoom.Tv` with `dotnet.exe publish -c Release -r linux-arm64 --self-contained`.
2. Stage the payload to `/opt/flowercore/divoom-tv/` through the standard noc1
jump path and avoid `/tmp` for unprivileged Pi scratch.
3. Vendor the profile and static files into `FlowerCore.Puppet`.
4. Run Puppet noop, then apply on the target Pi.
5. Prove deployment with `systemctl is-active flowercore-divoom-tv.service`,
journal lines showing frames presented, and a visible HDMI display check.

View File

@@ -1,19 +0,0 @@
---
# Example node data for a dedicated Pi -> HDMI -> TV Divoom renderer.
# Copy into FlowerCore.Puppet data/nodes/<hostname>.iamworkin.lan.yaml only
# after the Pi has a static DHCP/DNS entry and the linux-arm64 payload exists.
facts:
role: pi_prototype
profile::motd::role: 'Divoom TV HDMI Renderer'
profile::pi::service::divoom_tv::ensure: 'present'
profile::pi::service::divoom_tv::service_enabled: true
profile::pi::service::divoom_tv::service_ensure: 'running'
profile::pi::service::divoom_tv::install_dir: '/opt/flowercore/divoom-tv'
profile::pi::service::divoom_tv::state_dir: '/var/lib/fc-divoom-tv'
profile::pi::service::divoom_tv::log_dir: '/var/log/fc-divoom-tv'
profile::pi::service::divoom_tv::presentation_mode: 'PillarboxSquare'
profile::pi::service::divoom_tv::startup_scene: 'bluejay-clock'
profile::pi::service::divoom_tv::reduced_motion: false

View File

@@ -1,149 +0,0 @@
# Drop into FlowerCore.Puppet site-modules/profile/manifests/pi/service/divoom_tv.pp.
# Static files come from profile/pi/fc_divoom_tv/ after this bluejay-infra
# bundle is vendored into the Puppet control repo.
class profile::pi::service::divoom_tv (
Enum['present', 'absent'] $ensure = 'present',
Boolean $service_enabled = false,
Enum['running', 'stopped'] $service_ensure = 'stopped',
String $service_name = 'flowercore-divoom-tv',
String $user = 'fc-divoom-tv',
String $group = 'fc-divoom-tv',
String $install_dir = '/opt/flowercore/divoom-tv',
String $state_dir = '/var/lib/fc-divoom-tv',
String $log_dir = '/var/log/fc-divoom-tv',
String $presentation_mode = 'PillarboxSquare',
String $startup_scene = 'bluejay-clock',
Boolean $reduced_motion = false,
) {
include profile::workstation::safe_account_exclusion
$safe_account = $profile::workstation::safe_account_exclusion::safe_account
if $safe_account {
notify { 'fc-divoom-tv safe-account exclusion':
message => 'SAFE-ACCOUNT-EXCLUSION: Divoom TV Pi profile refused to apply on operator workstation',
}
} elsif $ensure == 'absent' {
service { $service_name:
ensure => stopped,
enable => false,
}
file { [
"/etc/systemd/system/${service_name}.service",
"/etc/systemd/system/${service_name}-hdmi.service",
'/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules',
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh',
'/usr/local/bin/flowercore-divoom-tv-launch.sh',
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh',
'/etc/flowercore/divoom-tv.env',
]:
ensure => absent,
}
} else {
case $facts['os']['family'] {
'Debian': {}
default: { fail("profile::pi::service::divoom_tv only supports Debian-family OS, got ${facts['os']['family']}") }
}
package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']:
ensure => installed,
}
group { $group:
ensure => present,
system => true,
}
user { $user:
ensure => present,
system => true,
gid => $group,
home => $state_dir,
managehome => false,
shell => '/usr/sbin/nologin',
require => Group[$group],
}
file { [$install_dir, $state_dir, $log_dir, '/etc/flowercore']:
ensure => directory,
owner => $user,
group => $group,
mode => '0755',
}
file { '/etc/flowercore/divoom-tv.env':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
content => "FC_DIVOOM_TV_PRESENTATION_MODE=${presentation_mode}\nFC_DIVOOM_TV_START_SCENE=${startup_scene}\nFC_DIVOOM_TV_REDUCED_MOTION=${reduced_motion}\n",
require => File['/etc/flowercore'],
}
$script_map = {
'/usr/local/bin/flowercore-divoom-tv-prelaunch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-prelaunch.sh',
'/usr/local/bin/flowercore-divoom-tv-launch.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh',
'/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh' => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi-respond.sh',
}
$script_map.each |$dest, $src| {
file { $dest:
ensure => file,
owner => 'root',
group => 'root',
mode => '0755',
source => "puppet:///modules/${src}",
}
}
$unit_map = {
"/etc/systemd/system/${service_name}.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service',
"/etc/systemd/system/${service_name}-hdmi.service" => 'profile/pi/fc_divoom_tv/flowercore-divoom-tv-hdmi.service',
}
$unit_map.each |$dest, $src| {
file { $dest:
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => "puppet:///modules/${src}",
notify => Exec['fc-divoom-tv-systemd-reload'],
}
}
file { '/etc/udev/rules.d/99-flowercore-divoom-tv-hdmi.rules':
ensure => file,
owner => 'root',
group => 'root',
mode => '0644',
source => 'puppet:///modules/profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules',
notify => Exec['fc-divoom-tv-udev-reload'],
}
exec { 'fc-divoom-tv-systemd-reload':
command => '/usr/bin/systemctl daemon-reload',
refreshonly => true,
path => ['/usr/bin', '/bin'],
}
exec { 'fc-divoom-tv-udev-reload':
command => '/usr/bin/udevadm control --reload-rules',
refreshonly => true,
path => ['/usr/bin', '/bin'],
}
service { $service_name:
ensure => $service_ensure,
enable => $service_enabled,
require => [
File["/etc/systemd/system/${service_name}.service"],
File['/etc/flowercore/divoom-tv.env'],
File['/usr/local/bin/flowercore-divoom-tv-prelaunch.sh'],
File['/usr/local/bin/flowercore-divoom-tv-launch.sh'],
Exec['fc-divoom-tv-systemd-reload'],
],
}
}
}

View File

@@ -1,5 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
sleep 2
systemctl restart flowercore-divoom-tv.service

View File

@@ -1,25 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
PRESENTATION_MODE="${FC_DIVOOM_TV_PRESENTATION_MODE:-PillarboxSquare}"
START_SCENE="${FC_DIVOOM_TV_START_SCENE:-bluejay-clock}"
REDUCED_MOTION="${FC_DIVOOM_TV_REDUCED_MOTION:-false}"
COMMON_ARGS=(
"--target=hdmi"
"--presentation-mode=${PRESENTATION_MODE}"
"--startup-scene=${START_SCENE}"
"--reduced-motion=${REDUCED_MOTION}"
"--state-dir=${STATE_DIR}"
"--log-dir=${LOG_DIR}"
)
if command -v cage >/dev/null 2>&1; then
exec cage -- "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"
fi
echo "[$(date -Is)] cage not found; launching FlowerCore.Divoom.Tv directly" >&2
exec "${APP_BIN}" "${COMMON_ARGS[@]}" "$@"

View File

@@ -1,23 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
APP_BIN="${FC_DIVOOM_TV_BIN:-/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv}"
STATE_DIR="${FC_DIVOOM_TV_STATE_DIR:-/var/lib/fc-divoom-tv}"
LOG_DIR="${FC_DIVOOM_TV_LOG_DIR:-/var/log/fc-divoom-tv}"
mkdir -p "${STATE_DIR}" "${LOG_DIR}"
if [[ ! -x "${APP_BIN}" ]]; then
echo "[$(date -Is)] missing executable ${APP_BIN}" >&2
exit 1
fi
if [[ -d /sys/class/drm ]] && ! find /sys/class/drm -maxdepth 1 -name 'card*-HDMI-A-*' -print -quit | grep -q .; then
echo "[$(date -Is)] no HDMI connector visible yet; continuing so the app can wait for display" >&2
fi
if command -v cage >/dev/null 2>&1; then
echo "[$(date -Is)] cage available for fullscreen Wayland launch"
else
echo "[$(date -Is)] cage not installed; direct launch fallback will be used" >&2
fi

View File

@@ -1,2 +0,0 @@
# Settle DRM for 2s before restarting the fullscreen Avalonia renderer.
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-divoom-tv-hdmi.service"

View File

@@ -1,7 +0,0 @@
[Unit]
Description=FlowerCore Divoom TV HDMI hotplug responder
DefaultDependencies=no
[Service]
Type=oneshot
ExecStart=/usr/local/bin/flowercore-divoom-tv-hdmi-respond.sh

View File

@@ -1,40 +0,0 @@
[Unit]
Description=FlowerCore Divoom TV HDMI Renderer (Avalonia fullscreen)
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/divoom-tv-hdmi-multitarget-render-substrate.md
Wants=network-online.target
After=network-online.target systemd-user-sessions.service
ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv
[Service]
Type=simple
User=fc-divoom-tv
Group=fc-divoom-tv
WorkingDirectory=/opt/flowercore/divoom-tv
EnvironmentFile=-/etc/flowercore/divoom-tv.env
Environment=DOTNET_CLI_TELEMETRY_OPTOUT=1
Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv
RuntimeDirectory=fc-divoom-tv
RuntimeDirectoryMode=0700
ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh
ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh
Restart=always
RestartSec=10s
StartLimitBurst=5
StartLimitIntervalSec=300s
MemoryMax=2G
MemoryHigh=1500M
PrivateTmp=true
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/var/lib/fc-divoom-tv /var/log/fc-divoom-tv /run/fc-divoom-tv
TTYPath=/dev/tty1
StandardInput=tty
StandardOutput=journal
StandardError=journal
TTYReset=yes
TTYVHangup=yes
TTYVTDisallocate=yes
[Install]
WantedBy=graphical.target

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: dms-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose dms-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: dms-web-public
# namespace: fc-dms
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`dms.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: dms-web-public-profile-header # injects entitlement profile
# services:
# - name: dms-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,481 +0,0 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: fc-dns
labels:
app.kubernetes.io/part-of: flowercore
---
# 1Password-backed Secret for the pfSense admin password.
# The operator watches this CRD, resolves the vault item, and produces a
# K8s Secret of the same name with each 1P field as a key. The `password`
# field of the "pfSense Admin" item becomes Secret key `password`.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: pfsense-admin
namespace: fc-dns
spec:
itemPath: "vaults/IAmWorkin/items/pfSense Admin"
---
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: dns-oidc-client
namespace: fc-dns
spec:
itemPath: "vaults/IAmWorkin/items/dns-oidc-client"
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: dns-web-data
namespace: fc-dns
spec:
accessModes: [ReadWriteOnce]
storageClassName: longhorn
resources:
requests:
storage: 1Gi
---
apiVersion: v1
kind: ConfigMap
metadata:
name: dns-web-config
namespace: fc-dns
data:
appsettings.Production.json: |
{
"FlowerCore": {
"Auth": {
"Enabled": false,
"Oidc": {
"Enabled": true,
"Audience": "dns",
"RequireHttpsMetadata": true
}
},
"Database": {
"Provider": "Sqlite",
"ConnectionStrings": {
"Sqlite": "Data Source=/data/dns.db"
}
},
"Tenant": {
"DefaultTenantId": "default",
"JwtClaimsEnabled": false,
"DefaultTenantHosts": [
"dns.iamworkin.lan"
]
},
"Audit": {
"HashChain": {
"BridgeSensitivity": {
"Distribution": "Warn"
}
}
}
}
}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dns-web
namespace: fc-dns
labels:
app.kubernetes.io/name: dns-web
app.kubernetes.io/managed-by: flowercore
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app.kubernetes.io/name: dns-web
template:
metadata:
labels:
app.kubernetes.io/name: dns-web
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "5320"
prometheus.io/path: "/metrics/prometheus"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
serviceAccountName: dns-web
securityContext:
runAsNonRoot: true
runAsUser: 1654
runAsGroup: 1654
fsGroup: 1654
containers:
- name: dns-web
image: localhost/fc-dns-web:v20260614-wave5-isolation-6124856
imagePullPolicy: Never
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
ports:
- containerPort: 5320
env:
# pfSense admin password resolved by the 1Password operator.
# `FallbackPassword` is the Slice A seam exposed by
# OptionsFallbackPasswordResolver; Slice B will replace it with
# a pull-at-runtime 1P Connect resolver once Shared.Vault ships.
- name: FlowerCore__Dns__Providers__PfSenseUnbound__FallbackPassword
valueFrom:
secretKeyRef:
name: pfsense-admin
key: password
- name: FlowerCore__Auth__Oidc__Authority
valueFrom:
secretKeyRef:
name: dns-oidc-client
key: issuer_url
optional: true
- name: FlowerCore__Auth__Oidc__ClientId
valueFrom:
secretKeyRef:
name: dns-oidc-client
key: client_id
optional: true
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: dns-oidc-client
key: client_secret
optional: true
- name: FlowerCore__Auth__Enabled
value: "false"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Audience
value: "dns"
volumeMounts:
- name: data
mountPath: /data
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
- name: config
mountPath: /app/appsettings.Production.json
subPath: appsettings.Production.json
readOnly: true
resources:
requests:
cpu: 50m
memory: 96Mi
limits:
cpu: 300m
memory: 384Mi
readinessProbe:
httpGet:
path: /healthz
port: 5320
initialDelaySeconds: 10
periodSeconds: 10
livenessProbe:
httpGet:
path: /healthz
port: 5320
initialDelaySeconds: 20
periodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: dns-web-data
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}
- name: config
configMap:
name: dns-web-config
---
apiVersion: v1
kind: Service
metadata:
name: dns-web
namespace: fc-dns
spec:
selector:
app.kubernetes.io/name: dns-web
ports:
- port: 5320
targetPort: 5320
type: ClusterIP
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: dns-web
namespace: fc-dns
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dns-web
rules:
- apiGroups: [""]
resources: ["namespaces", "pods", "services", "secrets", "configmaps"]
verbs: ["get", "list", "watch"]
- apiGroups: ["cert-manager.io"]
resources: ["certificates"]
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dns-web
subjects:
- kind: ServiceAccount
name: dns-web
namespace: fc-dns
roleRef:
kind: ClusterRole
name: dns-web
apiGroup: rbac.authorization.k8s.io
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: dns-web-cert
namespace: fc-dns
spec:
secretName: dns-web-tls
issuerRef:
name: step-ca-dns01
kind: ClusterIssuer
dnsNames:
- dns.iamworkin.lan
duration: 720h
renewBefore: 240h
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: dns-web
namespace: fc-dns
spec:
entryPoints: [websecure]
routes:
- match: Host(`dns.iamworkin.lan`)
kind: Rule
services:
- name: dns-web
port: 5320
tls:
secretName: dns-web-tls
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: dns-acme-webhook
namespace: fc-dns
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: dns-acme-webhook
namespace: fc-dns
labels:
app.kubernetes.io/name: dns-acme-webhook
app.kubernetes.io/managed-by: flowercore
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: dns-acme-webhook
template:
metadata:
labels:
app.kubernetes.io/name: dns-acme-webhook
spec:
serviceAccountName: dns-acme-webhook
securityContext:
runAsNonRoot: true
runAsUser: 1654
runAsGroup: 1654
fsGroup: 1654
containers:
- name: dns-acme-webhook
image: localhost/fc-dns-acme-webhook:v20260614-wave5-isolation-6124856
imagePullPolicy: Never
securityContext:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop: [ALL]
ports:
- containerPort: 9443
name: https
env:
- name: ASPNETCORE_URLS
value: https://+:9443
- name: Kestrel__Certificates__Default__Path
value: /tls/tls.crt
- name: Kestrel__Certificates__Default__KeyPath
value: /tls/tls.key
- name: FlowerCore__Dns__AcmeWebhook__ServiceBaseUrl
value: http://dns-web:5320
- name: FlowerCore__Dns__AcmeWebhook__GroupName
value: acme.flowercore.io
- name: FlowerCore__Dns__AcmeWebhook__SolverName
value: flowercore-dns
- name: FlowerCore__Dns__AcmeWebhook__Version
value: v1alpha1
volumeMounts:
- name: tls
mountPath: /tls
readOnly: true
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
resources:
requests:
cpu: 25m
memory: 64Mi
limits:
cpu: 200m
memory: 256Mi
readinessProbe:
httpGet:
scheme: HTTPS
path: /readyz
port: https
initialDelaySeconds: 5
periodSeconds: 10
timeoutSeconds: 5
livenessProbe:
httpGet:
scheme: HTTPS
path: /healthz
port: https
initialDelaySeconds: 10
periodSeconds: 20
timeoutSeconds: 5
volumes:
- name: tls
secret:
secretName: dns-acme-webhook-tls
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}
---
apiVersion: v1
kind: Service
metadata:
name: dns-acme-webhook
namespace: fc-dns
spec:
selector:
app.kubernetes.io/name: dns-acme-webhook
ports:
- port: 443
targetPort: https
name: https
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: dns-acme-webhook-selfsigned
namespace: fc-dns
spec:
selfSigned: {}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: dns-acme-webhook-ca
namespace: fc-dns
spec:
secretName: dns-acme-webhook-ca
duration: 43800h
issuerRef:
name: dns-acme-webhook-selfsigned
commonName: ca.dns-acme-webhook.fc-dns
isCA: true
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: dns-acme-webhook-ca-issuer
namespace: fc-dns
spec:
ca:
secretName: dns-acme-webhook-ca
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: dns-acme-webhook-serving-cert
namespace: fc-dns
spec:
secretName: dns-acme-webhook-tls
duration: 8760h
issuerRef:
name: dns-acme-webhook-ca-issuer
dnsNames:
- dns-acme-webhook
- dns-acme-webhook.fc-dns
- dns-acme-webhook.fc-dns.svc
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
name: v1alpha1.acme.flowercore.io
annotations:
cert-manager.io/inject-ca-from: fc-dns/dns-acme-webhook-serving-cert
spec:
group: acme.flowercore.io
groupPriorityMinimum: 1000
service:
name: dns-acme-webhook
namespace: fc-dns
version: v1alpha1
versionPriority: 15
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: dns-acme-webhook-solver
rules:
- apiGroups: ["acme.flowercore.io"]
resources: ["flowercore-dns"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: dns-acme-webhook-solver
subjects:
- kind: ServiceAccount
name: cert-manager
namespace: cert-manager
roleRef:
kind: ClusterRole
name: dns-acme-webhook-solver
apiGroup: rbac.authorization.k8s.io
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: step-ca-dns01
spec:
acme:
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ4RENDQVdxZ0F3SUJBZ0lSQVBZMzU3RzZvdzZ6TUFMNSs0YlMya2t3Q2dZSUtvWkl6ajBFQXdJd1FERWEKTUJnR0ExVUVDaE1SU1VGdFYyOXlhMmx1SUVGRFRVVWdRMEV4SWpBZ0JnTlZCQU1UR1VsQmJWZHZjbXRwYmlCQgpRMDFGSUVOQklGSnZiM1FnUTBFd0hoY05Nall3TXpBNE1UZ3dOekV4V2hjTk16WXdNekExTVRnd056RXhXakJBCk1Sb3dHQVlEVlFRS0V4RkpRVzFYYjNKcmFXNGdRVU5OUlNCRFFURWlNQ0FHQTFVRUF4TVpTVUZ0VjI5eWEybHUKSUVGRFRVVWdRMEVnVW05dmRDQkRRVEJaTUJNR0J5cUdTTTQ5QWdFR0NDcUdTTTQ5QXdFSEEwSUFCSjJuMDRYMQpKWm81WmRxL2kxSWR2OCtmcXdaeUF6Qmg3d2hicWowU1dzSkw4VVdSYWJDTXFZQ3M3K2RYTzB4UlN6cWt3RkRMCngrdm9vT2FpOFJnUk5oYWpSVEJETUE0R0ExVWREd0VCL3dRRUF3SUJCakFTQmdOVkhSTUJBZjhFQ0RBR0FRSC8KQWdFQk1CMEdBMVVkRGdRV0JCUm51UFBRUjZpTS9INnZPbHVpVTNTeWdheXo4akFLQmdncWhrak9QUVFEQWdOSQpBREJGQWlFQXJRSzlkWVBHbUFac2RZbmp6aXVGVlZFNU5LWlVjY2VZdkdmR0MrdExYVXNDSUF1ZEYyekpyQ1JxCjNtSzUwWlpFVC9md1RrSndpRUY0ODI0bWpQOHAxQ0tNCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
privateKeySecretRef:
name: step-ca-dns01-account-key
server: https://10.0.56.10:9443/acme/acme/directory
solvers:
- dns01:
webhook:
groupName: acme.flowercore.io
solverName: flowercore-dns

View File

@@ -1,6 +0,0 @@
# ArgoCD's bluejay-infra ApplicationSet discovers apps/* directories on main.
# The kustomization is included for local previews and single-app validation.
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- fc-dns.yaml

View File

@@ -1,195 +0,0 @@
# FlowerCore.Library.Web GitOps adoption manifest.
#
# Authored from the already-live fc-library resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: library-web-data
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-2690bae2-4ee0-417a-b95f-50ec5c632b63
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: library-web
strategy:
type: Recreate
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: library-web-config
image: localhost/fc-library-web:v20260614-regroup-f20adc1
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: library-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: library-web-data
---
apiVersion: v1
kind: Service
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
clusterIP: 10.43.179.63
clusterIPs:
- 10.43.179.63
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: library-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: library-web-tls
namespace: fc-library
labels:
app.kubernetes.io/name: library-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
dnsNames:
- library.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: library-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: library-web
namespace: fc-library
labels:
app.kubernetes.io/name: library-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-library
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`library.iamworkin.lan`)
services:
- name: library-web
port: 80
tls:
secretName: library-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose library-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: library-web-public
# namespace: fc-library
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`library.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: library-web-public-profile-header # injects entitlement profile
# services:
# - name: library-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -83,8 +83,6 @@ spec:
app.kubernetes.io/name: fc-llm-bridge
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
@@ -118,7 +116,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -164,33 +161,11 @@ spec:
name: fc-llm-bridge-api-keys
key: spare-2
optional: true
# Shared.Chat — GX10 Ollama via the INFRA-VLAN NodePort (10.0.56.14:30976),
# NOT the PROD-VLAN MetalLB VIP (10.0.57.201:11434). The cross-VLAN path to
# the VIP MTU-black-holes LARGE requests: Agent Zero's full prompt (458-line
# system prompt + 108 MCP tool descriptions ~150KB) times out / resets mid-
# stream there ("Connection reset by peer" in OllamaClient.ChatStreamAsync),
# which made AZ loop on "you have sent the same message again". The NodePort is
# same-VLAN as the old cluster (no inter-VLAN hop) and carries 150KB fine.
# (Small chat/embed requests still work on the VIP; only big agentic prompts broke.)
# Shared.Chat — Ollama (edge1 Pi 5 + AI HAT+, matches bridge default)
- name: FlowerCore__Chat__OllamaBaseUrl
value: "http://10.0.56.14:30976"
value: "http://10.0.57.17:11434"
- name: FlowerCore__Chat__HttpTimeout
value: "00:05:00"
# Tier routing override (Wiring A, 2026-06-14): repoint Agent Zero's
# chat (Balanced) + util (Cheap) tiers to the GX10's tool-capable
# local qwen2.5. Balanced was Anthropic Sonnet (cloud/cost, and the
# Anthropic key is currently 401); Cheap was gemma3:4b which CANNOT
# call tools (400 does not support tools) — fatal for an agentic loop.
# qwen2.5 instruct supports the tool-calling loop; GX10 has the memory.
# OllamaBaseUrl above points at the GX10 NodePort (10.0.56.14:30976).
- name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Provider
value: "Ollama"
- name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Balanced__Model
value: "qwen2.5:14b"
- name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Cheap__Provider
value: "Ollama"
- name: FlowerCore__Chat__ModelRouter__DefaultRoutes__Cheap__Model
value: "qwen2.5:7b"
# Shared.Chat — Anthropic
- name: FlowerCore__Chat__Anthropic__Enabled
value: "true"
@@ -306,26 +281,3 @@ spec:
port: 8080
tls:
secretName: fc-llm-bridge-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose fc-llm-bridge publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: fc-llm-bridge-public
# namespace: fc-llm-bridge
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`llm-bridge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: fc-llm-bridge-public-profile-header # injects entitlement profile
# services:
# - name: fc-llm-bridge
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,296 +0,0 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: fc-media
labels:
app.kubernetes.io/name: fc-media
app.kubernetes.io/part-of: flowercore
---
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: media-oidc-client
namespace: fc-media
labels:
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
itemPath: "vaults/IAmWorkin/items/media-oidc-client"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: fc-media-config
namespace: fc-media
labels:
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
data:
appsettings.Production.json: |
{
"DatabaseProvider": "Sqlite",
"ConnectionStrings": {
"Sqlite": "Data Source=/data/media.db"
},
"FlowerCore": {
"Auth": {
"Enabled": true,
"Oidc": {
"Authority": "https://id.iamworkin.lan/application/o/media/",
"ClientId": "media",
"ClientSecret": "",
"Audience": "media",
"RequireHttpsMetadata": true
}
},
"Tenant": {
"JwtClaimsEnabled": false,
"DefaultTenantHosts": [ "media.iamworkin.lan" ]
}
},
"Media": {
"LibraryRoot": "/media/library",
"Sources": [
{
"Name": "BlueJayNAS Video",
"Driver": "Nfs",
"MountedPath": "/media/library",
"RemotePath": "nfs://10.0.58.3/volume1/video",
"IsEnabled": true,
"IsDefault": true,
"Notes": "Synology NFS media share mounted read-only inside the cluster."
}
],
"GeneratedRoot": "/data/generated",
"TranscodeRoot": "/data/transcodes",
"InboxPath": "/media/inbox",
"InboxScanIntervalMinutes": 5,
"ScanOnStartup": false,
"ComputeChecksums": false,
"FfmpegCommand": "ffmpeg",
"FfprobeCommand": "ffprobe",
"Hls": {
"MaxConcurrentJobs": 1
},
"DefaultViewerName": "BlueJay",
"Dlna": {
"IsEnabled": true,
"MulticastAddress": "239.255.255.250",
"Port": 1900,
"DiscoveryTimeoutSeconds": 2,
"DescriptionFetchTimeoutSeconds": 2,
"MaxResponsesPerSearchTarget": 32,
"SearchTargets": [
"urn:schemas-upnp-org:device:MediaRenderer:1",
"urn:schemas-upnp-org:device:MediaServer:1"
]
}
}
}
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: fc-media-data
namespace: fc-media
labels:
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi
storageClassName: longhorn
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: fc-media-web
namespace: fc-media
labels:
app: fc-media-web
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: fc-media-web
template:
metadata:
labels:
app: fc-media-web
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "5200"
prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
nodeSelector:
kubernetes.io/hostname: rke2-server
containers:
- name: fc-media-web
image: localhost/fc-media-web:v20260604-oidc-proper
imagePullPolicy: Never
ports:
- containerPort: 5200
name: http
env:
- name: ASPNETCORE_ENVIRONMENT
value: Production
- name: ASPNETCORE_URLS
value: http://+:5200
- name: FlowerCore__Auth__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Audience
value: "media"
- name: FlowerCore__Auth__Oidc__ClientId
valueFrom:
secretKeyRef:
name: media-oidc-client
key: client_id
optional: true
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: media-oidc-client
key: client_secret
optional: true
- name: FlowerCore__Auth__Oidc__Authority
valueFrom:
secretKeyRef:
name: media-oidc-client
key: issuer_url
optional: true
resources:
requests:
cpu: 500m
memory: 1Gi
limits:
cpu: "4"
memory: 4Gi
volumeMounts:
- name: config
mountPath: /app/appsettings.Production.json
subPath: appsettings.Production.json
readOnly: true
- name: data
mountPath: /data
- name: transcodes
mountPath: /data/transcodes
- name: media-library
mountPath: /media/library
readOnly: true
- name: media-inbox
mountPath: /media/inbox
startupProbe:
httpGet:
path: /healthz
port: 5200
httpHeaders:
- name: X-Forwarded-Proto
value: https
failureThreshold: 18
periodSeconds: 10
readinessProbe:
httpGet:
path: /healthz
port: 5200
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /healthz
port: 5200
httpHeaders:
- name: X-Forwarded-Proto
value: https
initialDelaySeconds: 30
periodSeconds: 30
volumes:
- name: config
configMap:
name: fc-media-config
- name: data
persistentVolumeClaim:
claimName: fc-media-data
- name: transcodes
nfs:
server: 10.0.58.3
path: /volume1/kubernetes/fc-media-transcodes
- name: media-inbox
nfs:
server: 10.0.58.3
path: /volume1/kubernetes/fc-media-inbox
- name: media-library
nfs:
server: 10.0.58.3
path: /volume1/video
readOnly: true
---
apiVersion: v1
kind: Service
metadata:
name: fc-media-web
namespace: fc-media
labels:
app: fc-media-web
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
type: ClusterIP
selector:
app: fc-media-web
ports:
- port: 5200
targetPort: 5200
protocol: TCP
name: http
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: fc-media-tls
namespace: fc-media
labels:
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
secretName: fc-media-tls
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
dnsNames:
- media.iamworkin.lan
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: fc-media-web
namespace: fc-media
labels:
app.kubernetes.io/name: fc-media-web
app.kubernetes.io/part-of: flowercore
spec:
entryPoints:
- websecure
routes:
- match: Host(`media.iamworkin.lan`)
kind: Rule
services:
- name: fc-media-web
port: 5200
tls:
secretName: fc-media-tls

View File

@@ -1,6 +0,0 @@
# ArgoCD's bluejay-infra ApplicationSet discovers apps/* directories on main.
# The kustomization is included for local previews and single-app validation.
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- fc-media.yaml

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: menuboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose menuboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: menuboard-web-public
# namespace: fc-menuboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`menuboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: menuboard-web-public-profile-header # injects entitlement profile
# services:
# - name: menuboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -41,8 +41,6 @@ spec:
labels:
app: messageboard-web
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
@@ -54,7 +52,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
envFrom:
- configMapRef:
name: messageboard-web-config
@@ -144,26 +141,3 @@ spec:
port: 80
tls:
secretName: messageboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose messageboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: messageboard-web-public
# namespace: fc-messageboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`messageboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: messageboard-web-public-profile-header # injects entitlement profile
# services:
# - name: messageboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 5300
tls:
secretName: mysql-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose mysql-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: mysql-web-public
# namespace: fc-mysql
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`mysql.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: mysql-web-public-profile-header # injects entitlement profile
# services:
# - name: mysql-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,33 +0,0 @@
# Certificate for network.iamworkin.lan.
#
# Preflight gate: network.iamworkin.lan must resolve to 10.0.56.200 before this
# Certificate is synced. step-ca ACME cannot see the CoreDNS wildcard
# (*.iamworkin.lan -> 10.0.56.200) — it does an HTTP-01 challenge against the
# resolved host. The CoreDNS wildcard template covers network.iamworkin.lan, so
# resolution exists fleet-wide; do NOT add a pfSense DNS override (this plane is
# read-only and holds no pfSense creds). If ACME backs off, confirm the wildcard
# resolves first (feedback_pfsense_dns_required_for_acme).
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: fc-network-web-tls
namespace: fc-network
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/dns-preflight: "network.iamworkin.lan must resolve to 10.0.56.200 (CoreDNS wildcard) before ACME sync"
spec:
secretName: fc-network-web-tls
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
dnsNames:
- network.iamworkin.lan
duration: 720h
renewBefore: 240h

View File

@@ -1,145 +0,0 @@
# FlowerCore.Network.Web — the pfSense automation plane (read-only Phase 0, ADR-189).
#
# Phase 0 is READ-ONLY: the service holds NO pfSense credentials and has no write
# path to pfSense anywhere. The only mutating endpoint is POST /api/v1/snapshots,
# which ingests a config.xml the noc1 exporter collected READ-ONLY and stores it
# (redacted projection) on the PVC. Auth ships gate-OFF.
#
# Image localhost/fc-network-web:<tag> is built by FlowerCore.Network
# scripts/deploy-k8s.sh and imported to all schedulable RKE2 nodes (rke2-server +
# rke2-agent1; agent2 retired). imagePullPolicy: Never — bump the tag here, sync
# ArgoCD, then scale 0->1 for the RWO PVC and verify the running pod imageID.
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: fc-network-web
namespace: fc-network
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec:
replicas: 1
revisionHistoryLimit: 3
# RWO PVC: a single replica can't be surged (the new pod can't mount the volume
# while the old one holds it). maxSurge 0 / maxUnavailable 1 is the rwo-safe shape;
# for image bumps scale 0->1 rather than rollout restart.
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 0
maxUnavailable: 1
selector:
matchLabels:
app: fc-network-web
template:
metadata:
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "5340"
prometheus.io/path: "/metrics/prometheus"
flowercore.io/audit-trace-id: "runtime-activity-trace"
spec:
securityContext:
fsGroup: 1654
fsGroupChangePolicy: OnRootMismatch
containers:
- name: web
image: localhost/fc-network-web:v20260612-0b5b049
imagePullPolicy: Never
ports:
- name: http
containerPort: 5340
# fc-safe-to-expose: read-only plane, auth gate-OFF; X-Forwarded-Proto handled
# by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:5340"
- name: ASPNETCORE_ENVIRONMENT
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
- name: HOME
value: "/data"
- name: FlowerCore__Auth__Enabled
value: "false"
- name: FlowerCore__Database__Provider
value: "Sqlite"
- name: FlowerCore__Database__ConnectionStrings__Sqlite
value: "Data Source=/data/network.db"
# Snapshot store + intended-model paths MUST be absolute on the PVC —
# the default is relative to the read-only content root.
- name: FlowerCore__Network__SnapshotStore__RootDirectory
value: "/data/snapshots"
- name: FlowerCore__Network__SnapshotStore__UseGitHistory
value: "true"
- name: FlowerCore__Network__IntendedModel__FilePath
value: "/data/intended.json"
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 500m
memory: 512Mi
startupProbe:
httpGet:
path: /healthz
port: 5340
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 30
readinessProbe:
httpGet:
path: /healthz
port: 5340
periodSeconds: 10
failureThreshold: 3
livenessProbe:
httpGet:
path: /healthz
port: 5340
initialDelaySeconds: 30
periodSeconds: 30
failureThreshold: 3
securityContext:
runAsNonRoot: true
runAsUser: 1654
runAsGroup: 1654
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop:
- ALL
volumeMounts:
- name: data
mountPath: /data
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
volumes:
- name: data
persistentVolumeClaim:
claimName: fc-network-web-data
- name: tmp
emptyDir: {}
- name: logs
emptyDir: {}

View File

@@ -1,32 +0,0 @@
# LAN ingress for FlowerCore.Network Web (network.iamworkin.lan).
#
# RKE2 Traefik has no built-in ACME resolver; TLS certificate ownership stays in
# cert-manager Certificate/fc-network-web-tls. Phase 0 is read-only but the POST
# ingest endpoint is genuinely needed by the noc1 exporter, so this route allows
# all methods (no GET/HEAD-only restriction like fc-dns) — the service itself has
# NO pfSense write path, so allowing POST here only reaches the local snapshot
# ingest.
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: fc-network-web
namespace: fc-network
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
entryPoints:
- websecure
routes:
- match: Host(`network.iamworkin.lan`)
kind: Rule
services:
- name: fc-network-web
port: 80
tls:
secretName: fc-network-web-tls

View File

@@ -1,11 +0,0 @@
# ArgoCD's bluejay-infra ApplicationSet discovers apps/* directories on main.
# The kustomization is included for local previews and single-app validation.
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- pvc.yaml
- deployment-web.yaml
- service-web.yaml
- certificate-web.yaml
- ingressroute-web.yaml

View File

@@ -1,8 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: fc-network
labels:
app.kubernetes.io/part-of: flowercore
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra

View File

@@ -1,27 +0,0 @@
# Persistent store for FlowerCore.Network (read-only pfSense automation plane).
#
# Holds the SQLite snapshot INDEX db (network.db) AND the on-box snapshot store
# (data/snapshots): full-fidelity raw config.xml + redacted inventory sidecars +
# an on-box git history. Full-fidelity config is on-box ONLY (this PVC); the
# service DB / REST / MCP / UI only ever surface the REDACTED projection.
# RWO — single replica, scale 0->1 for updates (never rollout restart).
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: fc-network-web-data
namespace: fc-network
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
accessModes:
- ReadWriteOnce
storageClassName: longhorn
resources:
requests:
storage: 2Gi

View File

@@ -1,21 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: fc-network-web
namespace: fc-network
labels:
app: fc-network-web
app.kubernetes.io/name: fc-network-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
selector:
app: fc-network-web
ports:
- name: http
port: 80
targetPort: 5340
type: ClusterIP

View File

@@ -30,26 +30,3 @@ spec:
port: 5400
tls:
secretName: php-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose php-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: php-web-public
# namespace: fc-php
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`php.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: php-web-public-profile-header # injects entitlement profile
# services:
# - name: php-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: presentations-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose presentations-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: presentations-web-public
# namespace: fc-presentations
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`presentations.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: presentations-web-public-profile-header # injects entitlement profile
# services:
# - name: presentations-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,196 +0,0 @@
# FlowerCore.Retail.Web GitOps adoption manifest.
#
# Authored from the already-live fc-retail resources on 2026-06-04.
# Keep the live image tag, Service ClusterIP, and PVC volumeName unchanged so
# ArgoCD adopts in place instead of replacing the workload or data volume.
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: retail-web-data
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: longhorn
volumeMode: Filesystem
volumeName: pvc-3d40b336-eab4-41b3-812c-d5e9413ce0ab
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 3
selector:
matchLabels:
app.kubernetes.io/name: retail-web
strategy:
type: Recreate
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
kubectl.kubernetes.io/restartedAt: "2026-06-02T01:34:08-05:00"
prometheus.io/path: /metrics/prometheus
prometheus.io/port: "5000"
prometheus.io/scrape: "true"
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
spec:
containers:
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
- envFrom:
- configMapRef:
name: retail-web-config
image: localhost/fc-retail-web:v20260614-regroup-6d81424
imagePullPolicy: Never
livenessProbe:
failureThreshold: 3
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 5
name: retail-web
ports:
- containerPort: 5000
name: http
protocol: TCP
readinessProbe:
failureThreshold: 6
httpGet:
path: /health
port: 5000
scheme: HTTP
initialDelaySeconds: 10
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: data
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: data
persistentVolumeClaim:
claimName: retail-web-data
---
apiVersion: v1
kind: Service
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
clusterIP: 10.43.239.8
clusterIPs:
- 10.43.239.8
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: http
port: 80
protocol: TCP
targetPort: 5000
selector:
app.kubernetes.io/name: retail-web
sessionAffinity: None
type: ClusterIP
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: retail-web-tls
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web-tls
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
dnsNames:
- retail.iamworkin.lan
issuerRef:
kind: ClusterIssuer
name: step-ca-acme
secretName: retail-web-tls
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: retail-web
namespace: fc-retail
labels:
app.kubernetes.io/name: retail-web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
argocd.argoproj.io/instance: infra-fc-retail
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`retail.iamworkin.lan`)
services:
- name: retail-web
port: 80
tls:
secretName: retail-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose retail-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: retail-web-public
# namespace: fc-retail
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`retail.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: retail-web-public-profile-header # injects entitlement profile
# services:
# - name: retail-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -30,26 +30,3 @@ spec:
port: 80
tls:
secretName: scoreboard-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose scoreboard-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: scoreboard-web-public
# namespace: fc-scoreboard
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`scoreboard.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: scoreboard-web-public-profile-header # injects entitlement profile
# services:
# - name: scoreboard-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -37,26 +37,3 @@ spec:
port: 80
tls:
secretName: segmentdisplay-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose segmentdisplay-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: segmentdisplay-web-public
# namespace: fc-segmentdisplay
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`segmentdisplay.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: segmentdisplay-web-public-profile-header # injects entitlement profile
# services:
# - name: segmentdisplay-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,2 +1,3 @@
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
# Restart kiosk and redeclare capabilities when HDMI connect/disconnect changes DRM state.
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl restart flowercore-signage-player-pi.service"
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-detect-display.service"

View File

@@ -1,22 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
}
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
script="$(cat "$DETECT")"
[[ "$script" == *"displayConnected: false"* ]]
[[ "$script" == *"No HDMI display detected"* ]]
}
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
script="$(cat "$DETECT")"
[[ "$script" == *"edid-decode"* ]]
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
[[ "$script" == *"kmsprint"* ]]
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
[[ "$script" == *"capabilities.log"* ]]
}

View File

@@ -1,64 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
}
@test "bootstrap is idempotent when node is already enrolled" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
[[ "$script" == *"already enrolled"* ]]
[[ "$script" == *"exit 0"* ]]
}
@test "bootstrap generates a stable node uuid and machine id" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"uuidgen"* ]]
[[ "$script" == *"nodeUuid"* ]]
[[ "$script" == *"machineId"* ]]
[[ "$script" == *"cut -c1-16"* ]]
}
@test "bootstrap posts to the canonical register endpoint" {
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
}
@test "bootstrap retries registration once for first-call races" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"for attempt in 1 2"* ]]
[[ "$script" == *"register attempt \$attempt returned"* ]]
[[ "$script" == *"sleep 5"* ]]
}
@test "bootstrap supports setup-code approval with manual polling fallback" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"signage-setup-code"* ]]
[[ "$script" == *"approve-via-setup-code"* ]]
[[ "$script" == *"+ 1800"* ]]
[[ "$script" == *"sleep 15"* ]]
}
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
}
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"openssl pkcs12 -export"* ]]
[[ "$script" == *"client.p12.pass"* ]]
[[ "$script" == *"chmod 0640"* ]]
[[ "$script" == *"chmod 0600"* ]]
}
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
script="$(cat "$RENEW")"
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
[[ "$script" == *"client.key.new"* ]]
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
}

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
}
@test "player unit exists" {
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
}
@test "player unit uses simple chromium service with restart backoff" {
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
[[ "$unit" == *"Type=simple"* ]]
[[ "$unit" == *"Restart=always"* ]]
[[ "$unit" == *"RestartSec=10s"* ]]
[[ "$unit" == *"StartLimitBurst=5"* ]]
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
}
@test "player unit caps chromium memory at two gigabytes" {
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "player unit condition-gates startup on identity and p12 certificate" {
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "player unit runs prelaunch checks before chromium" {
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "hdmi udev rule routes through the two-second settle service" {
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
}
@test "hdmi responder settles, declares display, then restarts chromium" {
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
[[ "$responder" == *"sleep 2"* ]]
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
}
@test "chromium policy json is valid and disables credential prompts" {
command -v jq >/dev/null || skip "jq not installed"
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
}
@test "launch script tries embed URL and logs bare-player fallback" {
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
[[ "$launch" == *"url-divergence.log"* ]]
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
}
@test "prelaunch script validates required node and cert files" {
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
[[ "$prelaunch" == *"exit 1"* ]]
}

View File

@@ -46,26 +46,3 @@ spec:
services:
- name: signage-web
port: 5190
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose signage-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: signage-web-public
# namespace: fc-signage
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`signage.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: signage-web-public-profile-header # injects entitlement profile
# services:
# - name: signage-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -97,7 +97,6 @@ spec:
containers:
- name: piper
image: rhasspy/wyoming-piper:latest
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: PYTHONHTTPSVERIFY
value: "0"
@@ -524,8 +523,6 @@ spec:
app.kubernetes.io/name: ttsreader-web
app.kubernetes.io/part-of: flowercore
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
prometheus.io/scrape: "true"
prometheus.io/port: "5217"
prometheus.io/path: "/metrics"
@@ -535,7 +532,7 @@ spec:
fsGroupChangePolicy: OnRootMismatch
containers:
- name: web
image: localhost/fc-ttsreader-web:v20260614-wave5-help-2f096e3
image: localhost/fc-ttsreader-web:v20260506-phase6
imagePullPolicy: Never
ports:
- containerPort: 5217
@@ -557,16 +554,10 @@ spec:
value: "/data/chapter-context.db"
- name: TtsReader__Jobs__Root
value: "/data/jobs"
- name: TtsReader__Export__LocalCasRoot
value: "/data/bundles/cas"
- name: TtsReader__Piper__Host
value: "10.0.57.17"
value: "ttsreader-piper.fc-ttsreader.svc.cluster.local."
- name: TtsReader__Piper__Port
value: "8500"
- name: TtsReader__Piper__Transport
value: "http"
- name: TtsReader__Piper__HttpPath
value: "/tts"
value: "10200"
- name: TtsReader__Kokoro__Enabled
value: "true"
- name: TtsReader__Kokoro__BaseUrl
@@ -605,7 +596,7 @@ spec:
- name: TtsReader__Transcription__TimeoutSeconds
value: "300"
- name: TtsReader__Ollama__BaseUrl
value: "http://10.0.57.201:11434"
value: "http://10.0.57.17:11434"
- name: TtsReader__Ollama__DefaultModel
value: "gemma3:4b"
- name: TtsReader__Ollama__TimeoutSeconds
@@ -765,26 +756,3 @@ spec:
port: 5217
tls:
secretName: ttsreader-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose ttsreader-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: ttsreader-web-public
# namespace: fc-ttsreader
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`ttsreader.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: ttsreader-web-public-profile-header # injects entitlement profile
# services:
# - name: ttsreader-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -52,21 +52,17 @@ spec:
app: updatecenter-web
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/"
labels:
app: updatecenter-web
spec:
nodeName: rke2-server
containers:
- name: web
image: localhost/fc-updater-web:v20260614-regroup-bdf4a4a
image: localhost/fc-updater-web:v20260509-4162dca-authgate
imagePullPolicy: Never
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: http://+:8080
@@ -92,8 +88,6 @@ spec:
value: Faith AI Mike Edition
- name: FlowerCore__Updater__PublicShares__Links__0__Description
value: Private release link for Mike's Faith AI bundle.
- name: FlowerCore__Audit__Sinks__Loki__Enabled
value: "false"
- name: FlowerCore__Updater__Auth__Bootstrap__Enabled
value: "true"
- name: FlowerCore__Updater__Auth__Bootstrap__Username

View File

@@ -1,2 +0,0 @@
*.sh text eol=lf
Dockerfile text eol=lf

View File

@@ -1,54 +0,0 @@
FROM myoung34/github-runner:latest
ARG RUBY_VERSION=3.3.11
ARG RUBY_MINOR=3.3
ARG RUBY_BUILD_VERSION=v20260326
ARG RUNNER_UID=1001
ARG RUNNER_GID=1001
ENV RUNNER_TOOL_CACHE=/home/runner/_tool
ENV RUNNER_RUBY_TOOLCACHE=/opt/runner-toolcache
ENV PATH="/home/runner/_tool/Ruby/${RUBY_MINOR}/x64/bin:/opt/runner-toolcache/Ruby/${RUBY_MINOR}/x64/bin:${PATH}"
USER root
# Bake the IAmWorkin step-ca root CA into the system trust store. Without
# this, .NET HttpClient calls from CI tests against *.iamworkin.lan
# (e.g. https://selenium.iamworkin.lan/session) fail with `PartialChain`
# because the runner image's default Ubuntu trust bundle doesn't include
# our internal Root CA. update-ca-certificates regenerates
# /etc/ssl/certs/ca-certificates.crt, which OpenSSL + .NET on Linux read
# automatically — no SSL_CERT_FILE env var needed.
COPY step-ca-root.crt /usr/local/share/ca-certificates/iamworkin-step-ca-root.crt
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
autoconf \
bison \
build-essential \
ca-certificates \
curl \
libdb-dev \
libffi-dev \
libgdbm-dev \
libgmp-dev \
libncurses-dev \
libreadline-dev \
libssl-dev \
libyaml-dev \
patch \
pkg-config \
uuid-dev \
zlib1g-dev \
&& update-ca-certificates \
&& curl -fsSL "https://github.com/rbenv/ruby-build/archive/refs/tags/${RUBY_BUILD_VERSION}.tar.gz" -o /tmp/ruby-build.tar.gz \
&& mkdir -p /tmp/ruby-build \
&& tar -xzf /tmp/ruby-build.tar.gz --strip-components=1 -C /tmp/ruby-build \
&& /tmp/ruby-build/install.sh \
&& rm -rf /tmp/ruby-build /tmp/ruby-build.tar.gz /var/lib/apt/lists/*
COPY install-ruby-toolcache.sh /usr/local/bin/install-ruby-toolcache.sh
RUN chmod +x /usr/local/bin/install-ruby-toolcache.sh \
&& RUBY_VERSION="${RUBY_VERSION}" RUBY_MINOR="${RUBY_MINOR}" TOOLCACHE_ROOT="${RUNNER_RUBY_TOOLCACHE}" RUNNER_UID="${RUNNER_UID}" RUNNER_GID="${RUNNER_GID}" /usr/local/bin/install-ruby-toolcache.sh \
&& ruby -v

View File

@@ -1,139 +1,38 @@
# GitHub Runner Fleet
# github-runner
ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
ArgoCD-managed repo-scoped Linux GitHub Actions runners for FlowerCore.
## Runner Shape
`astoltz` is a GitHub user account, not an organization, so each repository
needs its own runner registration. The existing Common runner remains
`Deployment/github-runner`; Sprint 29 adds one single-replica Deployment for
each top Linux-cost repo:
All repo-scoped Linux runners use:
- `FlowerCore.Puppet`
- `FlowerCore.Signage`
- `FlowerCore.DMS`
- `FlowerCore.Telephony`
- `FlowerCore.Print.Web`
- `FlowerCore.Chat`
- `FlowerCore.MySQL`
- `FlowerCore.Kiosk.Linux`
- `localhost/fc-github-runner:v20260525-ruby3.3.11-stepca`, derived from
`myoung34/github-runner:latest`
- `ACCESS_TOKEN` from the `github-runner-token` Secret
- `RUN_AS_ROOT=false`
- `EPHEMERAL=true`
- `DISABLE_AUTO_UPDATE=true` so the runner does not self-update and exit inside
the immutable Kubernetes pod
- `LABELS=self-hosted,linux,fc-build-linux`
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
Actions tool cache
- Ruby 3.3.11 seeded into `/home/runner/_tool/Ruby/3.3/x64` from the baked
`/opt/runner-toolcache` copy so `ruby/setup-ruby@v1` can discover it on
self-hosted `ubuntu-20.04-x64` runners
Each runner uses `myoung34/github-runner:latest`, `EPHEMERAL=true`, and labels
`self-hosted,linux,fc-build-linux`. The shared `github-runner-token` Secret is
synced from the existing 1Password item `GitHub PAT (Runner Registration)` and
is consumed as `ACCESS_TOKEN`.
`github-runner` for `FlowerCore.Common` is single-replica because it retains the
original Longhorn ReadWriteOnce NuGet PVC. Every other repo-scoped runner uses
two replicas with per-pod `emptyDir` caches. That is the safe backlog-drain
strategy: no two pods share one RWO PVC.
Sprint 32 final long-tail wave adds 16 two-replica Deployments:
`FlowerCore.Knowledge`, `FlowerCore.LlmBridge`, `FlowerCore.Media`,
`FlowerCore.Presentations`, `FlowerCore.RemoteDesktop`, `FlowerCore.DNS`,
`FlowerCore.Distribution`, `FlowerCore.Scoreboard`,
`FlowerCore.SegmentDisplay`, `FlowerCore.Signage.Contracts`,
`FlowerCore.SignalControl`, `FlowerCore.Intranet.Web`,
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
`FlowerCore.MenuBoard`.
## Image Build
Ruby is baked with a pinned `ruby-build` release and Ruby patch version. The pod
still mounts an `emptyDir` over `/home/runner`, so the `setup-runner-home` init
container copies the baked toolcache from `/opt/runner-toolcache/Ruby` into
`/home/runner/_tool/Ruby` before the runner container starts.
The IAmWorkin step-ca root CA is also baked into the system trust store
(`/usr/local/share/ca-certificates/iamworkin-step-ca-root.crt`, registered by
`update-ca-certificates`). Without it, .NET HttpClient calls from CI tests
against `*.iamworkin.lan` (e.g. `https://selenium.iamworkin.lan/session`)
fail with `PartialChain`. To refresh the bundled cert when the root rotates,
re-extract from the cluster and overwrite `step-ca-root.crt`:
```bash
kubectl get secret -n cert-manager step-ca-root \
-o jsonpath='{.data.ca\.crt}' | base64 -d > step-ca-root.crt
```
```bash
cd apps/github-runner
podman build -t localhost/fc-github-runner:v20260525-ruby3.3.11-stepca .
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca ruby -v
podman run --rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
test -f /opt/runner-toolcache/Ruby/3.3/x64.complete
podman save localhost/fc-github-runner:v20260525-ruby3.3.11-stepca \
-o fc-github-runner-v20260525-ruby3.3.11-stepca.tar
```
Import the saved image on every schedulable RKE2 node before ArgoCD rolls the
Deployments:
```bash
for node in rke2-server rke2-agent1 rke2-agent2; do
scp fc-github-runner-v20260525-ruby3.3.11-stepca.tar "$node:/tmp/"
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images rm localhost/fc-github-runner:v20260525-ruby3.3.11-stepca || true'
ssh "$node" 'sudo ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-github-runner-v20260525-ruby3.3.11-stepca.tar'
done
```
## Post-Merge Proof
After the PR is merged and ArgoCD syncs, verify the runner fleet:
Do not `kubectl apply` this app over ArgoCD. Merge to `main`, let
`infra-github-runner` sync, then verify from `noc1`:
```bash
kubectl -n github-runner get deploy,pods,pvc
```
Verify the Ruby toolcache in a fresh pod:
```bash
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- ruby -v
kubectl -n github-runner exec deploy/github-runner-puppet -c runner -- sh -c \
'echo "$RUNNER_TOOL_CACHE" && test -f "$RUNNER_TOOL_CACHE/Ruby/3.3/x64.complete"'
```
Verify GitHub registration for the repo-scoped runners:
```bash
for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
FlowerCore.MySQL FlowerCore.Kiosk.Linux FlowerCore.Marquee FlowerCore.TtsReader \
FlowerCore.Knowledge FlowerCore.LlmBridge FlowerCore.Media \
FlowerCore.Presentations FlowerCore.RemoteDesktop FlowerCore.DNS \
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
FlowerCore.MenuBoard; do
echo "=== $repo ==="
for repo in FlowerCore.Puppet FlowerCore.Signage FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat FlowerCore.MySQL FlowerCore.Kiosk.Linux; do
gh api "/repos/astoltz/$repo/actions/runners" \
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
--jq '.runners[] | select((.labels[].name == "fc-build-linux") and (.status == "online")) | {name,status,busy,labels:[.labels[].name]}'
done
```
Shared.Pos publish proof after the runner pod is online:
```bash
gh run list --repo astoltz/FlowerCore.Shared.Pos \
--workflow "Build, Test & Publish" --branch main --limit 5
```
If the latest run is still queued after runner registration, rerun the workflow
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
## Failure Notes
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
`DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
present on the runner pod.
- `ruby/setup-ruby@v1` says self-hosted runners must install Ruby in
`$RUNNER_TOOL_CACHE`: check that the init container copied
`/opt/runner-toolcache/Ruby` into `/home/runner/_tool/Ruby` and that
`/home/runner/_tool/Ruby/3.3/x64.complete` exists.
- `404` during runner registration: the fine-grained PAT is valid but missing
repository access for that repo. Add the repo to the PAT access list; the PAT
value does not change.
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
stay single-replica. New multi-replica runners use `emptyDir`.
- Runner pods repeatedly registering, downloading a newer Actions runner, then
exiting with code 4: verify `DISABLE_AUTO_UPDATE=true` is present. The image
translates that into `config.sh --disableupdate`; without it, the Deployment
controller sees the expected self-update exit as CrashLoopBackOff.
`LinuxRunnerOffline` is declared in `apps/monitoring/noc-monitoring.yaml` and
fires when any Common or top-8 Linux runner deployment has no available replica
for 10 minutes.

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
RUBY_VERSION="${RUBY_VERSION:-3.3.11}"
RUBY_MINOR="${RUBY_MINOR:-3.3}"
TOOLCACHE_ROOT="${TOOLCACHE_ROOT:-/opt/runner-toolcache}"
RUNNER_UID="${RUNNER_UID:-1001}"
RUNNER_GID="${RUNNER_GID:-1001}"
RUBY_PREFIX="${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64"
mkdir -p "${TOOLCACHE_ROOT}/Ruby"
RUBY_CONFIGURE_OPTS="${RUBY_CONFIGURE_OPTS:---disable-install-doc --disable-yjit}" ruby-build "${RUBY_VERSION}" "${RUBY_PREFIX}"
touch "${TOOLCACHE_ROOT}/Ruby/${RUBY_VERSION}/x64.complete"
ln -sfn "${RUBY_VERSION}" "${TOOLCACHE_ROOT}/Ruby/${RUBY_MINOR}"
"${RUBY_PREFIX}/bin/ruby" -v
chown -R "${RUNNER_UID}:${RUNNER_GID}" "${TOOLCACHE_ROOT}"
chmod -R a+rX "${TOOLCACHE_ROOT}"

View File

@@ -1,12 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIBxDCCAWqgAwIBAgIRAPY357G6ow6zMAL5+4bS2kkwCgYIKoZIzj0EAwIwQDEa
MBgGA1UEChMRSUFtV29ya2luIEFDTUUgQ0ExIjAgBgNVBAMTGUlBbVdvcmtpbiBB
Q01FIENBIFJvb3QgQ0EwHhcNMjYwMzA4MTgwNzExWhcNMzYwMzA1MTgwNzExWjBA
MRowGAYDVQQKExFJQW1Xb3JraW4gQUNNRSBDQTEiMCAGA1UEAxMZSUFtV29ya2lu
IEFDTUUgQ0EgUm9vdCBDQTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABJ2n04X1
JZo5Zdq/i1Idv8+fqwZyAzBh7whbqj0SWsJL8UWRabCMqYCs7+dXO0xRSzqkwFDL
x+vooOai8RgRNhajRTBDMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/
AgEBMB0GA1UdDgQWBBRnuPPQR6iM/H6vOluiU3Sygayz8jAKBggqhkjOPQQDAgNI
ADBFAiEArQK9dYPGmAZsdYnjziuFVVE5NKZUcceYvGfGC+tLXUsCIAudF2zJrCRq
3mK50ZZET/fwTkJwiEF4824mjP8p1CKM
-----END CERTIFICATE-----

View File

@@ -44,32 +44,9 @@ spec:
labels:
app: intranet-web
spec:
# notes-corpus-clone: shallow-clones the Notes docs corpus into an emptyDir so
# the IntranetSearch indexer has /srv/flowercore-notes/docs to index. Uses the
# trailing-dot FQDN (gitea-clusterip.gitea.svc.cluster.local.) to bypass the
# CoreDNS *.iamworkin.lan template that otherwise resolves the in-cluster service
# name to the Traefik VIP for musl / ndots:5 pods (search-domain appending).
# Cred: gitea-corpus-cred (in-ns secret with the canonical 1P bluejay read cred;
# mirrors the imperative gitea-flowercore-notes argocd repo-cred pattern).
initContainers:
- name: notes-corpus-clone
image: alpine/git:2.45.2
imagePullPolicy: IfNotPresent
envFrom:
- secretRef:
name: gitea-corpus-cred
env:
- name: GIT_LFS_SKIP_SMUDGE
value: "1"
command: ["/bin/sh", "-c"]
args:
- 'git clone --depth 1 http://$username:$password@gitea-clusterip.gitea.svc.cluster.local.:3000/bluejay/FlowerCore.Notes.git /srv/flowercore-notes && echo "notes corpus cloned; docs entries:" && ls /srv/flowercore-notes/docs | wc -l'
volumeMounts:
- name: notes-corpus
mountPath: /srv/flowercore-notes
containers:
- name: intranet-web
image: localhost/fc-intranet-web:v20260614-wave5-knowledgefleet-1458b4d
image: localhost/fc-intranet-web:v20260508-brochure-w1
imagePullPolicy: Never
ports:
- containerPort: 5300
@@ -79,32 +56,18 @@ spec:
value: Production
- name: ASPNETCORE_URLS
value: "http://+:5300"
# Embed backend = edge1 Ollama BY IPv4 (10.0.57.17:11434; has
# nomic-embed-text). The hostname edge1.iamworkin.lan is UNUSABLE from
# cluster pods: it resolves to an unroutable IPv6 (fdbc:56:*) and the
# CoreDNS *.iamworkin.lan template maps the name to the Traefik VIP, so
# embeds failed with "No route to host". Use a bare pod-routable IPv4.
# Backend is BLUEJAY-AI's GPU node (Ollama / Vulkan Iris Xe, INFRA VLAN
# 10.0.56.132) which embeds nomic-embed-text in ~160ms vs the edge1 Pi 5's
# ~3.2s for the same ~512-token chunk (~20x faster bulk embed), proven
# pod-routable from the intranet namespace 2026-06-13. The prior edge1 Pi 5
# backend (10.0.57.17:11434) remains a working fallback if BLUEJAY-AI is
# down. Bulk embed runs in the background; /health does not depend on it.
# Memory: feedback_pi5_nomic_embed_slow.
# Bulk corpus indexing on edge1 Pi 5 takes ~6s/chunk × 5665 chunks
# ≈ 9 hours. BLUEJAY-WS GPU (R9700, 32GB VRAM) does the same work
# in minutes. Memory: feedback_pi5_nomic_embed_slow.
- name: IntranetSearch__OllamaBaseUrl
value: "http://10.0.57.201:11434"
# Notes docs corpus IS now mounted at /srv/flowercore-notes (see the
# notes-corpus-clone initContainer + notes-corpus-sync sidecar), so the
# IntranetSearch indexer is ENABLED. First-boot bulk embed of the corpus
# runs in the background via the edge1 Ollama backend above (~6s/chunk on
# the Pi 5); /health readiness does not depend on it, so the pod stays Ready.
- name: IntranetSearch__Enabled
value: "true"
# Page-reading override SQLite persistence on the writable PVC at
# /data. This backs pronunciation, notes, corrections, and
# page-profile metadata across pod restarts.
- name: PageReadingOverrides__DatabasePath
value: "/data/page-reading-overrides.db"
value: "http://10.0.56.20:11434"
# Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence
# on the writable PVC at /data. Without this env var the
# intranet falls back to the in-memory store (loses state on
# pod restart). Master's PageReadingOverrideOptions binds
# PageReadingOverrides:FilePath.
- name: PageReadingOverrides__FilePath
value: "/data/page-reading-overrides.json"
- name: KnowledgeFleetSearch__BaseUrl
value: "https://knowledge.iamworkin.lan"
- name: KnowledgeFleetSearch__ApiKey
@@ -141,40 +104,10 @@ spec:
volumeMounts:
- name: vector-store
mountPath: /data
- name: notes-corpus
mountPath: /srv/flowercore-notes
readOnly: true
# notes-corpus-sync: keeps the mounted corpus fresh between pod restarts by
# pulling the Notes repo every 30 min (best-effort; the initContainer guarantees
# a fresh clone at pod start). Reuses the clone's origin (trailing-dot host + creds).
- name: notes-corpus-sync
image: alpine/git:2.45.2
imagePullPolicy: IfNotPresent
envFrom:
- secretRef:
name: gitea-corpus-cred
env:
- name: GIT_LFS_SKIP_SMUDGE
value: "1"
command: ["/bin/sh", "-c"]
args:
- 'while true; do sleep 1800; git -C /srv/flowercore-notes pull --depth 1 2>&1 | sed "s/^/[notes-corpus-sync] /" || true; done'
resources:
requests:
memory: "32Mi"
cpu: "10m"
limits:
memory: "128Mi"
cpu: "200m"
volumeMounts:
- name: notes-corpus
mountPath: /srv/flowercore-notes
volumes:
- name: vector-store
persistentVolumeClaim:
claimName: intranet-vector-store
- name: notes-corpus
emptyDir: {}
---
apiVersion: v1
kind: Service

View File

@@ -90,12 +90,9 @@ spec:
app.kubernetes.io/name: knowledge-web
app.kubernetes.io/part-of: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
flowercore.io/healthz-auth-policy: "allow-anonymous"
spec:
securityContext:
runAsNonRoot: true
@@ -105,7 +102,7 @@ spec:
- name: web
# Placeholder tag — bump to the image you built + imported to ALL
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
image: localhost/fc-knowledge-web:v20260603-oidc-authentik-auditfix
image: localhost/fc-knowledge-web:v20260429232635
imagePullPolicy: Never
command:
- /bin/sh
@@ -119,7 +116,6 @@ spec:
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -127,25 +123,6 @@ spec:
value: "Production"
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
# AuthentiK/OIDC is enforced. /healthz stays anonymous by contract;
# see flowercore.io/healthz-auth-policy above and the Sprint 58
# OIDC readiness probe audit.
- name: FlowerCore__Auth__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Enabled
value: "true"
- name: FlowerCore__Auth__Oidc__Authority
value: "https://id.iamworkin.lan/application/o/knowledge/"
- name: FlowerCore__Auth__Oidc__Audience
value: "knowledge"
- name: FlowerCore__Auth__Oidc__ClientId
value: "knowledge"
- name: FlowerCore__Auth__Oidc__ClientSecret
valueFrom:
secretKeyRef:
name: knowledge-oidc-client
key: client_secret
optional: true
# Vector-store directory + embedding model + edition profile dir.
# Profile JSON is baked into the image at /home/app/editions via the
# csproj Content-link from FlowerCore.Common/editions/.
@@ -157,8 +134,6 @@ spec:
value: "5"
- name: Knowledge__MaxLimit
value: "50"
- name: Knowledge__Federation__DatabasePath
value: "/data/vector-stores/knowledge-federation.db"
- name: FlowerCore__Editions__ProfileDirectory
value: "/home/app/editions"
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
@@ -168,7 +143,7 @@ spec:
# need a separate ingestion lane that can opt into the
# workstation GPU when present.
- name: FlowerCore__Ollama__BaseUrl
value: "http://10.0.57.201:11434"
value: "http://10.0.57.17:11434"
- name: FlowerCore__Mcp__ApiKey__Key
valueFrom:
secretKeyRef:
@@ -289,26 +264,3 @@ spec:
port: 80
tls:
secretName: knowledge-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose knowledge-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: knowledge-web-public
# namespace: knowledge
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`knowledge.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: knowledge-web-public-profile-header # injects entitlement profile
# services:
# - name: knowledge-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -25,7 +25,7 @@ metadata:
role: github-actions-runner
flowercore.io/managed-by: bluejay-infra
spec:
runStrategy: Halted
runStrategy: Always
template:
metadata:
labels:

View File

@@ -207,13 +207,20 @@ spec:
- port: 993
targetPort: 993
name: imaps
# --- mail-tls Certificate REMOVED 2026-06-01 ---
# mail-tls is now managed OUTSIDE cert-manager: issued from step-ca's JWK 'admin'
# provisioner and auto-renewed by a systemd timer on noc1 (step ca renew), which
# writes the mail-tls secret directly. step-ca-acme only has an HTTP-01 (Traefik)
# solver, but mail.iamworkin.lan must resolve to the dedicated MetalLB IP 10.0.56.202
# (SMTP/IMAP), so HTTP-01 cannot validate. Do NOT re-add a cert-manager Certificate
# here unless a DNS-01 solver is deployed for step-ca-acme.
---
# TLS Certificate via cert-manager
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: mail-tls
namespace: mail
spec:
secretName: mail-tls
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
dnsNames:
- mail.iamworkin.lan
---
# Traefik IngressRoute - Webmail placeholder
apiVersion: traefik.io/v1alpha1

View File

@@ -75,20 +75,6 @@ data:
cluster: "rke2"
role: "agent"
# Mac mini macOS runner node (INFRA VLAN)
- job_name: "macmini-node"
scrape_timeout: 15s
static_configs:
- targets: ["10.0.56.115:9100"]
labels:
instance: "macmini"
host: "macmini.iamworkin.lan"
vlan: "infra"
arch: "arm64"
role: "macos-runner"
puppet_managed: "true"
puppet_server: "puppet.iamworkin.lan"
# In-cluster node-exporter DaemonSet
- job_name: "k8s-node-exporter"
kubernetes_sd_configs:
@@ -216,24 +202,19 @@ data:
- job_name: "pimanager-app"
scrape_interval: 15s
metrics_path: /metrics
scheme: https
tls_config:
insecure_skip_verify: true
static_configs:
- targets: ["piez.iamworkin.lan"]
- targets: ["10.0.58.25:5000"]
labels:
instance: "piez"
service: "signalcontrol"
service: "pimanager"
vlan: "home"
device: "pi4-ezconnect"
rig: "signal-b"
- targets: ["pirelay.iamworkin.lan"]
- targets: ["10.0.58.113:5100"]
labels:
instance: "pirelay"
service: "signalcontrol"
service: "pimanager"
vlan: "home"
device: "pi3-ks0212"
rig: "signal-a"
# Epson ET-3750 EcoTank Printer SNMP
- job_name: "snmp-printer"
@@ -285,14 +266,13 @@ data:
printer_model: "NuPrint 210"
# Print.Web health (Blazor app on edge2:5200)
# Target `/health` (anonymous) — root path requires API key auth and returns 401.
- job_name: "probe-printweb"
metrics_path: /probe
params:
module: [http_2xx]
scrape_interval: 30s
static_configs:
- targets: ["http://10.0.57.16:5200/health"]
- targets: ["http://10.0.57.16:5200/"]
labels:
instance: "print-web"
service: "print-web"
@@ -484,33 +464,24 @@ data:
- "https://gitea.iamworkin.lan/"
- "https://argocd.iamworkin.lan/"
- "https://intranet.iamworkin.lan/"
- "https://signage.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://signage.iamworkin.lan/"
- "https://kiosk.iamworkin.lan/"
- "https://media.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://mysql.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://php.iamworkin.lan/healthz" # root 401 auth-gated 2026-06-01; /healthz anon 200
- "https://media.iamworkin.lan/"
- "https://mysql.iamworkin.lan/"
- "https://php.iamworkin.lan/"
- "https://zabbix.iamworkin.lan/"
- "https://desktop.iamworkin.lan/"
- "https://print.iamworkin.lan/healthz" # root 401 behind API key auth; /healthz anonymous 200
- "https://dns.iamworkin.lan/healthz" # root auth-gated by OIDC; /healthz anonymous 200
- "https://signalcontrol.iamworkin.lan/health" # FlowerCore.SignalControl Pi control plane
- "https://flowercore.iamworkin.lan/healthz" # FlowerCore landing
- "https://replay.iamworkin.lan/healthz" # FlowerCore.Signage replay surface
- "https://worldbuilder.iamworkin.lan/healthz" # FlowerCore.WorldBuilder
- "https://updates.iamworkin.lan/api/v1/manifests/_schema" # UpdateCenter plural LAN alias
- "https://updatecenter-internal.iamworkin.lan/api/v1/manifests/_schema" # internal UC schema route
- "https://chat.iamworkin.lan/healthz" # OIDC staged; keep blackbox off root before enforcement flips
- "https://dist.iamworkin.lan/healthz" # root/admin auth-gated by OIDC; /healthz anonymous 200
- "https://dms.iamworkin.lan/healthz" # future OIDC posture; health route is already anonymous/live
- "https://print.iamworkin.lan/"
- "https://dns.iamworkin.lan/"
- "https://chat.iamworkin.lan/"
- "https://dist.iamworkin.lan/"
- "https://dms.iamworkin.lan/"
- "https://menuboard.iamworkin.lan/"
- "https://messageboard.iamworkin.lan/"
- "https://presentations.iamworkin.lan/"
- "https://retail.iamworkin.lan/"
- "https://ttsreader.iamworkin.lan/"
# Explicit healthcheck paths
- "https://library.iamworkin.lan/health"
- "https://aistation.iamworkin.lan/healthz"
- "https://knowledge.iamworkin.lan/healthz"
- "https://fc-llm-bridge.iamworkin.lan/healthz"
- "https://acme.iamworkin.lan/health"
# NOTE: services intentionally NOT in this probe surface
@@ -726,36 +697,6 @@ data:
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
- name: macmini-runners
rules:
- alert: MacMiniRunnerOffline
expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
for: 10m
labels:
severity: warning
service: github-runner
annotations:
summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
- name: linux-runners
rules:
- alert: LinuxRunnerOffline
expr: |
kube_deployment_status_replicas_ready{
namespace="github-runner",
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
} == 0
for: 5m
labels:
severity: warning
alert_channel: irc
service: github-runner
team: ci
annotations:
summary: "Linux CI runner offline: {{ $labels.deployment }}"
description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
- name: remote-desktop
rules:
- alert: RemoteDesktopWebDown
@@ -922,13 +863,12 @@ data:
# of idle and SNMP times out, so 5m for: would page nightly. A
# genuine printer outage (jam, disconnected) lasts well over 30m.
- alert: EpsonPrinterDown
expr: (max_over_time(up{job="snmp-printer"}[35m]) == bool 0) == 1 and (hour() >= 13 or hour() < 1)
expr: up{job="snmp-printer"} == 0
for: 30m
labels:
severity: info
alert_channel: irc
severity: warning
annotations:
summary: "Epson ET-3750 SNMP unreachable during waking hours (30m)"
summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"
- alert: SynologyDiskLow
expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
@@ -982,52 +922,6 @@ data:
annotations:
summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"
# Puppet agent + service alerts.
# Mirror of FlowerCore.Notes/scripts/monitoring/alerts.yml `puppet` group
# so a future migration to in-cluster Prometheus inherits the ruleset.
# Source-of-truth for the live Podman Prometheus on noc1 is the Notes file.
# See feedback_monitoring_k8s_target_vs_live_podman.
- name: puppet
rules:
- alert: PuppetAgentReportStale
expr: puppet_last_run_age_seconds > 7200
for: 30m
labels:
severity: warning
alert_channel: irc
annotations:
summary: "Puppet agent {{ $labels.instance }} hasn't reported in over 2h"
description: "Last run age: {{ $value | humanizeDuration }}. The puppet agent on {{ $labels.instance }} may be stopped, the node may be powered off, or noc1 may be unreachable from this node."
runbook: "1. SSH to node (via noc1 jumpbox if needed) 2. sudo systemctl status puppet 3. sudo puppet agent -t --noop to force a run 4. Check r10k: ssh fcadmin@10.0.56.10 'sudo podman logs openvoxserver --tail 50' 5. Verify noc1 reachability: ping puppet.iamworkin.lan"
- alert: PuppetAgentReportCritical
expr: puppet_last_run_age_seconds > 86400
for: 1h
labels:
severity: critical
alert_channel: irc
annotations:
summary: "Puppet agent {{ $labels.instance }} silent for over 24h — node is unmanaged"
description: "Last run age: {{ $value | humanizeDuration }}. Node {{ $labels.instance }} has not submitted a Puppet report in over 24 hours. Config drift is accumulating — investigate immediately. If intentional (maintenance), add to the exclusion filter or silence in Grafana."
runbook: "URGENT: 1. Check node power state 2. SSH via noc1 jumpbox: ssh fcadmin@10.0.56.10 then ssh <node> 3. sudo systemctl status puppet 4. sudo systemctl start puppet + sudo puppet agent -t 5. Check for network partitions (VLAN connectivity to 10.0.56.10) 6. If node was recently reimaged: sudo puppet agent -t to re-register with new SSL cert"
# Sprint 33 Cx-7 Phase B (2026-05-25 postmortem follow-up):
# Detects puppet.service in failed state — distinct from PuppetAgentReportStale
# which catches "agent hasn't run." This catches "systemd gave up restarting it"
# (CA-verify loop or other fatal exit). Requires node-exporter systemd collector
# enabled with --collector.systemd. If `node_systemd_unit_state` has no series
# for a node, the collector is disabled there — flag in postmortem follow-up.
- alert: PuppetServiceFailed
expr: node_systemd_unit_state{name="puppet.service",state="failed"} == 1
for: 5m
labels:
severity: warning
alert_channel: irc
annotations:
summary: "Puppet service failed on {{ $labels.instance }}"
description: "puppet.service on {{ $labels.instance }} has been in failed state for 5+ minutes. systemd has stopped auto-restarting (CA-verify-loop or other exit). Manual `systemctl status puppet` confirms. Run `sudo systemctl start puppet` to recover; investigate journal for root cause."
runbook_url: "https://github.com/astoltz/FlowerCore.Notes/blob/master/memory/feedback_puppet_service_dead_after_ca_loop_alert_misreads.md"
# K8s pod-state alerts. Require kube-state-metrics scrape (added
# 2026-04-26 — see scrape_configs above). Would have surfaced the
# agent-zero ollama-proxy 172x crash-loop instead of letting it
@@ -1035,12 +929,7 @@ data:
- name: kubernetes-state
rules:
- alert: KubeContainerRestartingFrequently
# Exclude github-runner: ephemeral runners register, run one job,
# exit cleanly, and restart by design. Also require kube_pod_info so
# deleted rollout pods do not keep firing from retained restart series.
expr: |
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[1h]) > 5
and on(namespace, pod) kube_pod_info
expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
for: 15m
labels:
severity: warning
@@ -1049,12 +938,7 @@ data:
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
- alert: KubeContainerCrashLooping
# Same github-runner/delete-retention exclusions as the hourly
# restart rule above; real runner failures are covered by the
# dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts.
expr: |
increase(kube_pod_container_status_restarts_total{namespace!="github-runner"}[15m]) > 3
and on(namespace, pod) kube_pod_info
expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
for: 5m
labels:
severity: critical
@@ -1082,10 +966,7 @@ data:
description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
- alert: KubeDeploymentReplicasMismatch
# github-runner has explicit runner-offline alerts; the generic
# replica-mismatch rule should not page on intentionally ephemeral
# 0/1 runner churn between CI jobs.
expr: kube_deployment_spec_replicas{namespace!="github-runner"} != kube_deployment_status_replicas_available{namespace!="github-runner"}
expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
for: 15m
labels:
severity: warning
@@ -1093,6 +974,19 @@ data:
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
- alert: LinuxRunnerOffline
expr: |
kube_deployment_status_replicas_available{namespace="github-runner",deployment=~"github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} < 1
for: 10m
labels:
severity: warning
service: github-runner
alert_channel: thermal_print
annotations:
summary: "Linux GitHub Actions runner offline: {{ $labels.deployment }}"
description: "{{ $labels.deployment }} has no available runner pod for 10 minutes. GitHub jobs using [self-hosted, linux, fc-build-linux] for its repo will queue at $0 until the runner returns."
runbook_url: "https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md"
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
# outage (21h) hit because no alert fired on the rising multus working
@@ -1302,55 +1196,24 @@ metadata:
data:
notify.py: |
#!/usr/bin/env python3
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
/api/print/alert. Thermal printing is BATCHED into hourly digests by
default so the printer no longer spam-fires per Grafana webhook.
Routing (per Grafana webhook alert):
- IRC: always per-event (operator likes the stream)
- Thermal printer:
* severity in {critical,disaster,page} OR
label alert_channel=thermal_print_immediate -> print NOW
* label alert_channel=thermal_print -> enqueue into hourly digest
* everything else -> IRC only
- RESOLVED webhooks remove the alert from the digest buffer
Env vars (defaults preserve old behavior on first deploy):
THERMAL_PRINT_ENABLED default "true" - master kill switch
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
BATCH_MAX_PENDING default "50" - force-flush threshold
HTTP surface:
POST / - Grafana webhook entry
POST /flush - manual digest flush (idempotent)
GET / - status + config + buffer depth + stats
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
"""
import json, os, socket, sys, threading, time
from collections import defaultdict
from datetime import datetime, timezone
import json, socket, sys, time
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import Request, urlopen
from urllib.error import URLError
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
_buffer_lock = threading.Lock()
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
_last_flush_time = time.time()
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
"buffer_resolved": 0, "started_at": time.time()}
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
IRC_PORT = 6667
IRC_NICK = "grafana-bot"
IRC_CHANNEL = "#alerts"
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
PRINT_ENABLED = True
def send_irc(message):
"""Connect, handle PING, join, send, quit."""
try:
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
@@ -1383,137 +1246,52 @@ data:
time.sleep(0.5)
sock.sendall(b"QUIT :alert delivered\r\n")
sock.close()
_stats["irc_sent"] += 1
return True
except Exception as e:
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
return False
def post_thermal(payload, kind):
if not THERMAL_PRINT_ENABLED:
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
return False
def send_thermal_print(alert):
if not PRINT_ENABLED: return
labels = alert.get("labels", {})
annotations = alert.get("annotations", {})
status = alert.get("status", "firing").upper()
summary = annotations.get("summary", "")
description = annotations.get("description", "")
runbook = annotations.get("runbook", "")
# Build a useful message: summary + description + runbook steps
parts = []
if summary: parts.append(summary)
if description and description != summary: parts.append(description)
if runbook: parts.append("STEPS: " + runbook)
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
payload = {
"title": labels.get("alertname", "Unknown"),
"severity": labels.get("severity", "warning").capitalize(),
"host": labels.get("instance", labels.get("host", "unknown")),
"message": message,
"eventId": alert.get("fingerprint", ""),
"source": "Grafana",
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
"acknowledged": False
}
try:
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
headers={"Content-Type": "application/json"}, method="POST")
resp = urlopen(req, timeout=10)
if kind == "immediate": _stats["print_immediate"] += 1
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
return True
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
except Exception as e:
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
return False
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
def fingerprint_of(alert):
fp = alert.get("fingerprint", "")
if fp: return fp
def should_print(alert):
labels = alert.get("labels", {})
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
def is_critical(alert):
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
def is_immediate_label(alert):
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
def is_batched_label(alert):
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
def add_to_digest(alert):
"""Add an alert to the digest buffer. Returns True if the buffer GREW
(new fingerprint), False if it was a dedup, resolution, or no-op.
"""
if not THERMAL_PRINT_ENABLED: return False
fp = fingerprint_of(alert)
status = alert.get("status", "firing").lower()
with _buffer_lock:
if status == "resolved":
if fp in _buffer:
del _buffer[fp]
_stats["buffer_resolved"] += 1
return False
if fp in _buffer:
_buffer[fp]["last_seen"] = time.time()
_buffer[fp]["alert"] = alert
_stats["buffer_dedup"] += 1
return False
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
_stats["buffer_added"] += 1
return True
def build_digest_payload():
with _buffer_lock:
items = list(_buffer.values())
if not items: return None
by_name = defaultdict(list)
for item in items:
labels = item["alert"].get("labels", {})
by_name[labels.get("alertname", "Unknown")].append(item)
lines = []
for name, group in sorted(by_name.items()):
targets = []
for it in group[:5]:
labels = it["alert"].get("labels", {})
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
or labels.get("statefulset") or labels.get("namespace") or "?")
targets.append(t)
more = f" (+{len(group)-5})" if len(group) > 5 else ""
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
title = f"Alert digest: {len(items)} firing"
body = "\n".join([
f"=== {title} ===",
f"as of {now}",
"",
*lines,
"",
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
])
return {"title": title, "severity": "Warning", "host": "monitoring",
"message": body, "eventId": f"digest-{int(time.time())}",
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
def flush_digest():
payload = build_digest_payload()
if payload is None:
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
return False
sent = post_thermal(payload, "digest")
with _buffer_lock:
_buffer.clear()
if sent: _stats["digest_flushed"] += 1
return sent
def digest_loop():
global _last_flush_time
while True:
try:
now = time.time()
elapsed = now - _last_flush_time
if elapsed >= BATCH_INTERVAL_MIN * 60:
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
flush_digest()
_last_flush_time = now
elif len(_buffer) >= BATCH_MAX_PENDING:
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
flush_digest()
_last_flush_time = now
time.sleep(15)
except Exception as e:
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
time.sleep(60)
if labels.get("alert_channel") == "thermal_print": return True
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
if alert.get("status", "").upper() == "RESOLVED": return False
return False
class Handler(BaseHTTPRequestHandler):
def do_POST(self):
if self.path == "/flush":
ok = flush_digest()
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
return
_stats["webhooks_received"] += 1
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else {}
for alert in body.get("alerts", []):
@@ -1528,56 +1306,22 @@ data:
msg = f"{icon}{sev_tag} {name}: {summary}"
if desc: msg += f"\n {desc}"
send_irc(msg)
# Thermal routing — EVERYTHING (including criticals) goes into
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
# label bypasses, and even that flushes-the-current-digest rather
# than printing a standalone job, so the same fingerprint can't
# spam the printer per webhook cycle.
if status == "RESOLVED":
add_to_digest(alert) # removes from buffer
continue
if is_immediate_label(alert):
# Explicit opt-in for "paper this NOW" — first arrival of a
# new fingerprint triggers an immediate digest flush; repeat
# webhooks for the same fingerprint dedupe in the buffer
# until the next interval or until the alert resolves.
new_in_buffer = add_to_digest(alert)
if new_in_buffer:
global _last_flush_time
flush_digest()
_last_flush_time = time.time()
elif is_critical(alert) or is_batched_label(alert):
add_to_digest(alert)
# else: IRC-only (warnings without thermal_print label)
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
if should_print(alert): send_thermal_print(alert)
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(b'{"status":"ok"}')
def do_GET(self):
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
with _buffer_lock:
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
depth = len(_buffer)
info = {
"service": "irc-notify",
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
"batch_interval_min": BATCH_INTERVAL_MIN,
"batch_max_pending": BATCH_MAX_PENDING,
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
"print_web_url": PRINT_WEB_URL},
"buffer": {"depth": depth, "alertnames": alertnames,
"seconds_since_last_flush": int(time.time() - _last_flush_time),
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
"stats": _stats,
}
self.wfile.write(json.dumps(info, indent=2).encode())
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
def log_message(self, format, *args):
print(f"[irc-notify] {args[0]}", file=sys.stderr)
if __name__ == "__main__":
threading.Thread(target=digest_loop, daemon=True).start()
server = HTTPServer(("0.0.0.0", 9119), Handler)
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
server.serve_forever()
# =============================================================================
@@ -3664,39 +3408,6 @@ data:
relativeTimeRange: {from: 120, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
- orgId: 1
name: CI Runners
folder: CI Alerts
interval: 1m
rules:
- uid: linux-runner-offline
title: LinuxRunnerOffline
condition: C
for: 5m
noDataState: OK
execErrState: Error
annotations:
summary: "Linux CI runner offline: {{ $labels.deployment }}"
description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
labels:
severity: warning
service: github-runner
alert_channel: irc
team: ci
data:
- refId: A
relativeTimeRange: {from: 300, to: 0}
datasourceUid: prometheus
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
- refId: B
relativeTimeRange: {from: 300, to: 0}
datasourceUid: __expr__
model: {type: reduce, expression: A, reducer: last, refId: B}
- refId: C
relativeTimeRange: {from: 300, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
- orgId: 1
name: Infrastructure
folder: AI Stack Alerts
@@ -3729,24 +3440,25 @@ data:
relativeTimeRange: {from: 120, to: 0}
datasourceUid: __expr__
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
- uid: macmini-runner-offline
title: MacMiniRunnerOffline
- uid: linux-runner-offline
title: LinuxRunnerOffline
condition: C
for: 10m
noDataState: Alerting
execErrState: OK
annotations:
summary: Mac mini GitHub runner offline
description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
summary: Linux GitHub Actions runner offline
description: "A repo-scoped fc-build-linux runner deployment has no available pod. Jobs will queue at $0 until ArgoCD/K8s returns the runner."
runbook_url: "https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md"
labels:
severity: warning
service: github-runner
alert_channel: thermal_print
data:
- refId: A
relativeTimeRange: {from: 600, to: 0}
datasourceUid: prometheus
model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
model: {expr: 'min by(deployment) (kube_deployment_status_replicas_available{namespace="github-runner",deployment=~"github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"})', instant: true, refId: A}
- refId: B
relativeTimeRange: {from: 600, to: 0}
datasourceUid: __expr__

View File

@@ -24,16 +24,7 @@
# (10.0.57.16:5200), public internet 80/443 (excluding RFC1918), and
# fc-signage:5190 for the signage AAT lane.
# - Ingress: Traefik (4444 + 8089 ACME-solver-style), intra-pod,
# telephony / gitea / fc-system / fc-signage / github-runner namespaces
# on 4444.
#
# 2026-05-25: added github-runner ingress on 4444 so CI jobs running in
# self-hosted runner pods (e.g. FlowerCore.Print.Web `help-screenshots`)
# can reach the grid. Without this allow, the session POST to
# `selenium-hub.selenium.svc.cluster.local:4444` was DNAT'd to the hub
# pod IP and then dropped at the Calico ingress hook — Selenium UI showed
# 0/4 sessions while the .NET HTTP client timed out at 60s. Same family
# as `feedback_netpol_dnat_backend_port`, wrong-source-namespace flavor.
# telephony / gitea / fc-system / fc-signage namespaces on 4444.
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
@@ -212,13 +203,6 @@ spec:
ports:
- port: 4444
protocol: TCP
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: github-runner
ports:
- port: 4444
protocol: TCP
podSelector: {}
policyTypes:
- Ingress

View File

@@ -1,427 +0,0 @@
# Selenium Grid 4 — RKE2 deployment
#
# Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from
# the GitHub Actions self-hosted runners. ArgoCD owns this namespace from
# 2026-05-25 (`infra-selenium` Application; previously these resources were
# orphan kubectl-applied since 2026-03-15).
#
# Endpoints:
# - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444
# - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444
# - Traefik public: https://selenium.iamworkin.lan
#
# Browser maxSessions:
# - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy
# Print.Web help-screenshots was the global bottleneck;
# see commit history for ops/runner-replica-rightsize)
# - firefox 1
# - edge 1
#
# Screenshots + video recording write to NFS via the chrome video sidecar.
# See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes.
---
apiVersion: v1
kind: Service
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
name: selenium-hub
namespace: selenium
spec:
ports:
- name: web
port: 4444
targetPort: 4444
- name: publish
port: 4442
targetPort: 4442
- name: subscribe
port: 4443
targetPort: 4443
selector:
app: selenium-hub
type: ClusterIP
---
apiVersion: v1
kind: Service
metadata:
annotations:
metallb.io/ip-allocated-from-pool: bluejay-pool
metallb.universe.tf/loadBalancerIPs: 10.0.56.208
labels:
app: selenium-hub
component: external-access
name: selenium-hub-external
namespace: selenium
spec:
clusterIP: 10.43.90.147
clusterIPs:
- 10.43.90.147
externalTrafficPolicy: Local
healthCheckNodePort: 32213
ports:
- name: web
nodePort: 32411
port: 4444
targetPort: 4444
- name: publish
nodePort: 32068
port: 4442
targetPort: 4442
- name: subscribe
nodePort: 31000
port: 4443
targetPort: 4443
selector:
app: selenium-hub
type: LoadBalancer
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
name: selenium-hub
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-hub
template:
metadata:
labels:
app: selenium-hub
app.kubernetes.io/name: selenium-hub
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
- name: SE_SESSION_REQUEST_TIMEOUT
value: '300'
- name: SE_SESSION_RETRY_INTERVAL
value: '5'
- name: JAVA_OPTS
value: -Xmx512m
image: selenium/hub:4.27.0
livenessProbe:
httpGet:
path: /wd/hub/status
port: 4444
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 5
name: selenium-hub
ports:
- containerPort: 4444
name: web
- containerPort: 4442
name: publish
- containerPort: 4443
name: subscribe
readinessProbe:
httpGet:
path: /wd/hub/status
port: 4444
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 5
# Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi
# limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the
# stampede-buffer pattern documented for multus
# (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m
# against a 500m limit, no contention.
resources:
limits:
cpu: 500m
memory: 1536Mi
requests:
cpu: 250m
memory: 1Gi
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-chrome
app.kubernetes.io/name: selenium-node-chrome
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-chrome
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-chrome
template:
metadata:
labels:
app: selenium-node-chrome
app.kubernetes.io/name: selenium-node-chrome
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '2'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'false'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-chrome:4.27.0
livenessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
name: selenium-chrome
ports:
- containerPort: 5555
name: node
readinessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
# was running 684Mi idle on the same cap. Matches the Firefox node's
# tested-stable 2Gi limit. CPU unchanged.
resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: 500m
memory: 1Gi
volumeMounts:
- mountPath: /dev/shm
name: dshm
- env:
- name: DISPLAY_CONTAINER_NAME
value: localhost
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_VIDEO_FILE_NAME
value: auto
- name: SE_VIDEO_UPLOAD_ENABLED
value: 'false'
image: selenium/video:ffmpeg-7.1-20250101
name: video
resources:
limits:
cpu: 500m
memory: 768Mi
requests:
cpu: 250m
memory: 384Mi
volumeMounts:
- mountPath: /videos
name: selenium-videos
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
- emptyDir:
sizeLimit: 5Gi
name: selenium-videos
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-firefox
app.kubernetes.io/name: selenium-node-firefox
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-firefox
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-firefox
template:
metadata:
labels:
app: selenium-node-firefox
app.kubernetes.io/name: selenium-node-firefox
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '1'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'true'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_START_VNC
value: 'false'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-firefox:4.27.0
livenessProbe:
failureThreshold: 5
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 5
name: selenium-firefox
ports:
- containerPort: 5555
name: node
readinessProbe:
failureThreshold: 5
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
timeoutSeconds: 5
resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: 500m
memory: 1Gi
volumeMounts:
- mountPath: /dev/shm
name: dshm
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: selenium-node-edge
app.kubernetes.io/name: selenium-node-edge
app.kubernetes.io/part-of: selenium-grid
name: selenium-node-edge
namespace: selenium
spec:
replicas: 1
selector:
matchLabels:
app: selenium-node-edge
template:
metadata:
labels:
app: selenium-node-edge
app.kubernetes.io/name: selenium-node-edge
app.kubernetes.io/part-of: selenium-grid
spec:
containers:
- env:
- name: SE_EVENT_BUS_HOST
value: selenium-hub
- name: SE_EVENT_BUS_PUBLISH_PORT
value: '4442'
- name: SE_EVENT_BUS_SUBSCRIBE_PORT
value: '4443'
- name: SE_NODE_MAX_SESSIONS
value: '1'
- name: SE_NODE_OVERRIDE_MAX_SESSIONS
value: 'true'
- name: SE_VNC_NO_PASSWORD
value: '1'
- name: SE_SCREEN_WIDTH
value: '1920'
- name: SE_SCREEN_HEIGHT
value: '1080'
- name: SE_NODE_SESSION_TIMEOUT
value: '300'
image: selenium/node-edge:4.27.0
livenessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 30
periodSeconds: 15
name: selenium-edge
ports:
- containerPort: 5555
name: node
readinessProbe:
httpGet:
path: /status
port: 5555
initialDelaySeconds: 15
periodSeconds: 5
# Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi
# -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the
# original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2
# was running 684Mi idle on the same cap. Matches the Firefox node's
# tested-stable 2Gi limit. CPU unchanged.
resources:
limits:
cpu: '1'
memory: 2Gi
requests:
cpu: 500m
memory: 1Gi
volumeMounts:
- mountPath: /dev/shm
name: dshm
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: dshm
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: selenium-hub
namespace: selenium
spec:
entryPoints:
- websecure
routes:
- kind: Rule
match: Host(`selenium.iamworkin.lan`)
services:
- name: selenium-hub
port: 4444
tls:
secretName: selenium-tls

View File

@@ -1,8 +1,7 @@
# FlowerCore.Telephony - Blazor Server + REST API + Twilio IVR
# ArgoCD managed - BlueJay Lab
# Credentials: 1Password → OnePasswordItem CRD → K8s Secret (twilio-credentials)
# TTS: Piper on GX10 (10.0.56.14:30850, en_US-amy-medium) — endpoint /tts with {"text":"..."}
# edge1 (10.0.57.17:8500, amy-low) kept as warm fallback (revert PiperUrl to roll back)
# TTS: Piper on edge1 (10.0.57.17:8500) — endpoint /tts with {"text":"..."}
# Public: telephony.flowercore.io via Cloudflare origin cert
---
apiVersion: v1
@@ -63,8 +62,7 @@ data:
"Password": "bluejay-asterisk-ari",
"Application": "flowercore-pbx",
"ReconnectDelaySeconds": 5,
"MaxReconnectDelaySeconds": 60,
"WebSocketKeepAliveIntervalSeconds": 30
"MaxReconnectDelaySeconds": 60
},
"Sip": {
"Domain": "10.0.56.207",
@@ -72,7 +70,7 @@ data:
"Transport": "udp"
},
"Tts": {
"PiperUrl": "http://10.0.56.14:30850",
"PiperUrl": "http://10.0.57.17:8500",
"DefaultEngine": "piper",
"SampleRate": 8000
},
@@ -116,9 +114,6 @@ spec:
app: telephony-web
template:
metadata:
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/health"
labels:
app: telephony-web
spec:
@@ -156,7 +151,7 @@ spec:
topologyKey: kubernetes.io/hostname
containers:
- name: telephony-web
image: localhost/fc-telephony-web:v20260614-arifix
image: localhost/fc-telephony-web:v202604252156
imagePullPolicy: Never
securityContext:
readOnlyRootFilesystem: true
@@ -166,7 +161,6 @@ spec:
ports:
- containerPort: 5100
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: Telephony__Twilio__AccountSid
valueFrom:
@@ -186,16 +180,6 @@ spec:
name: twilio-credentials
key: DefaultFromNumber
optional: true
# Env vars OVERRIDE appsettings.Production.json in ASP.NET Core config.
# These were previously applied live-only (kubectl) and drifted from git;
# codified here so git is the source of truth. Tts__PiperUrl is the real
# TTS cutover lever (the configmap "Tts" block is shadowed by this env).
- name: Tts__PiperUrl
value: "http://10.0.56.14:30850" # GX10 amy-medium; edge1 10.0.57.17:8500 = rollback
- name: Ari__Username
value: "flowercore"
- name: Ari__Password
value: "bluejay-asterisk-ari"
volumeMounts:
- name: telephony-config
mountPath: /app/appsettings.Production.json
@@ -332,14 +316,7 @@ spec:
protocol: UDP
- port: 53
protocol: TCP
# Allow Piper TTS on GX10 (10.0.56.14:30850) — primary
- to:
- ipBlock:
cidr: 10.0.56.14/32
ports:
- port: 30850
protocol: TCP
# Allow Piper TTS on edge1 (10.0.57.17:8500) — warm fallback / rollback target
# Allow Piper TTS on edge1 (10.0.57.17:8500)
- to:
- ipBlock:
cidr: 10.0.57.17/32
@@ -410,3 +387,4 @@ spec:

View File

@@ -12,27 +12,25 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
in pfSense Unbound before this manifest is applied, or cert-manager
HTTP-01 silently exponential-backs-off ~2h.
Memory: `feedback_pfsense_dns_required_for_acme`.
2. **Image import to ALL Ready RKE2 nodes** — pod can currently schedule to
`rke2-server` (10.0.56.11) and `rke2-agent1` (10.0.56.12). Build with:
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
`rke2-agent2` (10.0.56.13). Build with:
```bash
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
mkdir -p artifacts/deploy
podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
for h in 10.0.56.11 10.0.56.12; do
ssh fcadmin@$h "mkdir -p /home/fcadmin/.fcv"
scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
ssh fcadmin@$h \
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
-n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
done
```
Memory: `feedback_rke2_image_import_per_node_scp`.
3. **Bump image tag** in `worldbuilder.yaml` and git push.
ArgoCD ApplicationSet picks up within ~3 minutes.
4. **First production render** — verify
`https://worldbuilder.iamworkin.lan/healthz`, open
`https://worldbuilder.iamworkin.lan/settings`, and confirm the image backend
reports ComfyUI before running an operator-owned render lane.
4. **First production render** — open `https://worldbuilder.iamworkin.lan`,
create World → Character → Storyboard → ExportJob, confirm artifact
downloads. ComfyUI lives on BLUEJAY-WS at `http://10.0.56.20:8188`.
## Health probes
@@ -55,8 +53,8 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
## Image generation backend
The live internal profile now uses
`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=comfyui` with
`BaseUrl=http://10.0.56.20:8188` on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2).
Keep the public host pre-staging disabled unless the five safe-to-expose gates
are rechecked; the live GPU lane is operator-owned and internal-only.
`FlowerCore:WorldBuilder:ImageGeneration:BaseUrl=http://10.0.56.20:8188` —
ComfyUI runs on BLUEJAY-WS Windows (R9700 / gfx1201 / ROCm 7.2.1). Pod reaches
the workstation directly across the 10.0.56.0/24 VLAN (no Podman-style host-
filter issues — K8s pods route via Calico, which is L3-routed across the
VLAN).

View File

@@ -5,10 +5,10 @@
#
# Image build (BLUEJAY-WS):
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
# podman save localhost/fc-worldbuilder:v<TAG> -o artifacts/deploy/fc-worldbuilder-v<TAG>.tar
# for h in 10.0.56.11 10.0.56.12; do
# scp artifacts/deploy/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/home/fcadmin/.fcv/
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v<TAG>.tar"
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
# done
---
apiVersion: v1
@@ -16,11 +16,7 @@ kind: Namespace
metadata:
name: fc-worldbuilder
labels:
app.kubernetes.io/name: fc-worldbuilder
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
---
# SQLite DB + generated image gallery + PDF/PNG exports.
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
@@ -29,13 +25,6 @@ kind: PersistentVolumeClaim
metadata:
name: worldbuilder-data
namespace: fc-worldbuilder
labels:
app.kubernetes.io/name: worldbuilder-data
app.kubernetes.io/component: storage
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
accessModes:
- ReadWriteOnce
@@ -51,13 +40,7 @@ metadata:
namespace: fc-worldbuilder
labels:
app.kubernetes.io/name: worldbuilder-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
spec:
replicas: 1
revisionHistoryLimit: 3
@@ -71,18 +54,11 @@ spec:
metadata:
labels:
app.kubernetes.io/name: worldbuilder-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
annotations:
fc.flowercore.io/healthz-anon: "true"
fc.flowercore.io/probe-path: "/healthz"
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics/prometheus"
flowercore.io/audit-trace-id: "worldbuilder-runtime-demo"
spec:
securityContext:
fsGroup: 1654
@@ -90,12 +66,11 @@ spec:
containers:
- name: web
# Bump tag for each rebuild. Initial deploy: v202605062048
image: localhost/fc-worldbuilder:v20260613-e4-about-edd6efc
image: localhost/fc-worldbuilder:v202605062048
imagePullPolicy: Never
ports:
- containerPort: 8080
name: http
# fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178) before any future public/OIDC flip.
env:
- name: ASPNETCORE_URLS
value: "http://+:8080"
@@ -117,16 +92,11 @@ spec:
value: "/data/gallery"
- name: FlowerCore__WorldBuilder__Export__RootPath
value: "/data/exports"
# Operator-approved live GPU lane. Internal-only host targets
# BLUEJAY-WS ComfyUI; keep public host pre-staging disabled below.
# ComfyUI on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1).
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
value: "http://10.0.56.20:8188"
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__BackendId
value: "comfyui"
- name: FlowerCore__WorldBuilder__ImageGeneration__VisitorSafe
value: "false"
resources:
# Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy
# time) while actual CPU usage is well below capacity. Idle Blazor
@@ -195,11 +165,7 @@ metadata:
namespace: fc-worldbuilder
labels:
app.kubernetes.io/name: worldbuilder-web
app.kubernetes.io/component: web
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
type: ClusterIP
selector:
@@ -214,13 +180,6 @@ kind: Certificate
metadata:
name: worldbuilder-web-tls
namespace: fc-worldbuilder
labels:
app.kubernetes.io/name: worldbuilder-web-tls
app.kubernetes.io/component: ingress
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
secretName: worldbuilder-web-tls
issuerRef:
@@ -241,13 +200,6 @@ kind: IngressRoute
metadata:
name: worldbuilder-web
namespace: fc-worldbuilder
labels:
app.kubernetes.io/name: worldbuilder-web
app.kubernetes.io/component: ingress
app.kubernetes.io/part-of: flowercore
app.kubernetes.io/managed-by: argocd
flowercore.io/tenant-id: system
flowercore.io/created-by: bluejay-infra
spec:
entryPoints:
- websecure
@@ -259,26 +211,3 @@ spec:
port: 80
tls:
secretName: worldbuilder-web-tls
# ---- PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only) ----
# When the operator decides to expose worldbuilder-web publicly, uncomment + update the host,
# then verify the five safe-to-expose gates (authentik-safe-to-expose-readiness-2026-06-07.md section 2).
#
# --- IngressRoute ---
# apiVersion: traefik.io/v1alpha1
# kind: IngressRoute
# metadata:
# name: worldbuilder-web-public
# namespace: worldbuilder
# spec:
# entryPoints: [websecure]
# routes:
# - match: Host(`worldbuilder.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
# kind: Rule
# middlewares:
# - name: worldbuilder-web-public-profile-header # injects entitlement profile
# services:
# - name: worldbuilder-web
# port: 80
# tls: {}
# # POST/PUT/PATCH/DELETE miss every route -> Traefik 404 -> no admin writes on the public surface.
# # Reference pattern: dist.flowercore.io (already live + method-gated; do not edit that one).

View File

@@ -1,84 +0,0 @@
# openvoxserver Quadlet Durability
This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`.
## Current State
As of the Sprint 32 Cx-12 apply on 2026-05-17:
- `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key.
- `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container.
- `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed.
- `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`.
- `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present.
## Discovery
Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly.
```bash
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service"
```
If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit.
## Durable Fix Shape
The Quadlet keeps the deploy key as a path reference only:
```ini
Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts
```
The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs:
```bash
/usr/bin/podman exec openvoxserver git config --global --add safe.directory *
```
The deploy script self-heals inside the container before it fetches the control repo:
```bash
git config --global --add safe.directory "*" 2>/dev/null || true
DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key"
KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts"
REPO="/etc/puppetlabs/code/environments/production"
export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS"
git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true
```
## Validation
Non-destructive validation:
```bash
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l"
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand"
```
Destructive recreate smoke is opt-in only:
```bash
scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh"
```
Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds.
## Credential Rotation Note
When rotating the Puppet deploy key, update the persisted serverdata copy on noc1:
```bash
sudo install -m 0600 -o root -g root <new-deploy-key> /opt/puppet/serverdata/.puppet-deploy-key
sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts"
sudo systemctl start openvoxserver-safeconfig.service
sudo systemctl start puppet-deploy.service
```
Never commit the deploy key or print it in logs.

View File

@@ -1,129 +0,0 @@
# authentik-tenant-mapping-sync — GATED manifest staging
**Status:** GATED (suspended). **ADR:** ADR-198 §2.A P1 (Au-1 / Au-3 substrate). **Pairs:** Codex **Cx2-7**.
This directory is a **Notes staging area**, NOT a deploy target. The orchestrator relocates
`cronjob.yaml` into a `gated/` path **outside** `bluejay-infra/apps/` so ArgoCD's `apps/*`
directory generator never picks it up. Nothing here runs until the activation steps below.
## What this is
A nightly Kubernetes `CronJob` that runs
[`scripts/authentik/authentik-tenant-mapping-sync.py`](../../../scripts/authentik/authentik-tenant-mapping-sync.py)
(Notes repo). The script:
- reads the 1Password Document **`flowercore-tenant-mapping`** (vault `IAmWorkin`, field
`mapping`) via **1Password Connect REST** — never the 1Password CLI/desktop (operator hard rule);
- parses + light-validates the mapping JSON (schema: [`authentik-oidc-tenant-mapping-schema.md`](../../standards/authentik-oidc-tenant-mapping-schema.md) — `version==1`, `mappings[]` with `authentikGroup` / `fcTenantId` / `fcRole`);
- reconciles each distinct `authentikGroup` into Authentik `/api/v3/core/groups/`:
create-if-missing, PATCH-managed-markers-on-drift, **never delete or disable unmanaged groups**;
- emits structured (Serilog-shaped JSON) logs and exits 0 on success.
It is the **slow nightly fix-up path**. The **<1s hot path** stays the MCP tool
`authentik_sync_tenant_mapping` (schema doc §6.2 force-broadcast). This CronJob does NOT
broadcast SignalR — group reconcile is its only side effect; services pick up mapping changes
on their own 5-minute 1P refresh.
## Why it is GATED (two locks)
1. **`spec.suspend: true`** in `cronjob.yaml` — belt-and-suspenders so even if applied it never fires.
2. **Lives outside `apps/`** — staged here in Notes; ArgoCD does not manage it.
Both must be cleared to go live. This pairs Codex **Cx2-7**: do not activate ahead of the Au-3
public-go for tenant self-registration.
## Files
| File | Purpose |
|------|---------|
| `cronjob.yaml` | The suspended `CronJob` + the script-delivery `ConfigMap` (placeholder body). |
| `README.md` | This file. |
| `scripts/authentik/authentik-tenant-mapping-sync.py` | The reconcile script (canonical source; NOT in this dir). |
## Secrets (referenced, not invented)
No secret **values** appear in `cronjob.yaml` — only `secretKeyRef`s:
- **`AUTHENTIK_TOKEN`** ← `Secret authentik/authentik-credentials` key `BOOTSTRAP_ADMIN_TOKEN`
(already exists; the same token `provision-oidc-client.py` reads). **Au-9 caveat:** this is the
never-rotated bootstrap token — when `/rotate-password rotate authentik` (Au-9) lands, this
CronJob is one of its fan-out consumers.
- **`OP_TOKEN`** ← `Secret authentik/tenant-mapping-sync-op-token` key `token`.
### OP_TOKEN cross-namespace
The canonical 1P Connect token Secret is `onepassword-system/onepassword-token`, but this
CronJob runs in the `authentik` namespace and K8s Secrets are namespace-scoped. Pick one at
activation:
- **Option A (copy, simplest).** Mint a same-namespace copy right before un-suspending:
```sh
kubectl get secret onepassword-token -n onepassword-system -o jsonpath='{.data.token}' \
| base64 -d \
| kubectl create secret generic tenant-mapping-sync-op-token -n authentik \
--from-file=token=/dev/stdin --dry-run=client -o yaml | kubectl apply -f -
```
(Re-run whenever the Connect token rotates — add this CronJob to the **Au-10** Connect-token
fan-out checklist so the copy can't go stale.)
- **Option B (CRD, preferred long-term).** Use an `OnePasswordItem` CRD
(`feedback_1password_operator_pattern`) so the 1P operator mints/refreshes
`authentik/tenant-mapping-sync-op-token` automatically — no manual copy, rotation-safe.
> If neither secret exists yet, that's fine **while suspended** — the job never schedules.
## How to ACTIVATE (at Au-3 public-go)
1. **Pre-flight (workstation dry-run, writes nothing):**
```sh
export AUTHENTIK_TOKEN=... # or let it read authentik/authentik-credentials via kubectl
export OP_TOKEN=... # or rely on credential-helper.sh get_op_token (fcadmin@noc1)
python scripts/authentik/authentik-tenant-mapping-sync.py --dry-run --verbose
```
Confirm the planned create/update set matches the 1P mapping document.
2. **Provide `OP_TOKEN` in-cluster** — Option A or B above.
3. **Materialize the script ConfigMap from the canonical file** (do NOT hand-edit a copy into
`cronjob.yaml` — the embedded body is a deliberate placeholder):
```sh
kubectl create configmap authentik-tenant-mapping-sync-script -n authentik \
--from-file=authentik-tenant-mapping-sync.py=scripts/authentik/authentik-tenant-mapping-sync.py \
--dry-run=client -o yaml | kubectl apply -f -
```
(Or, in the imaged future per ADR-198 §2.B P3, bake the script into `fc-runtime-base` and
drop the ConfigMap volume.)
4. **Relocate into bluejay-infra** — move `cronjob.yaml` into a `gated/` (or `apps/`) path in
`bluejay-infra` per the orchestrator's placement decision. If under `apps/`, ArgoCD will sync it.
5. **Un-suspend** — set `spec.suspend: false` (commit in `bluejay-infra` so ArgoCD selfHeal
doesn't revert), or one-off:
```sh
kubectl patch cronjob authentik-tenant-mapping-sync -n authentik \
-p '{"spec":{"suspend":false}}'
```
6. **Smoke (VG-A1):** trigger an immediate run and check the structured logs:
```sh
kubectl create job --from=cronjob/authentik-tenant-mapping-sync tms-smoke -n authentik
kubectl logs -n authentik job/tms-smoke
```
Then edit a mapping entry in 1P and confirm the next run reconciles the group; the <1s
propagation still comes from the MCP `authentik_sync_tenant_mapping` force-broadcast.
## Rollback
Re-suspend (`spec.suspend: true`) or delete the CronJob. The script never deletes Authentik
groups, so a bad run can only over-create groups present in the mapping — remove any unwanted
group by hand in the Authentik admin UI. No data loss path.
## Idempotency / safety summary
- Re-running is a no-op when groups already match (mirrors `provision-oidc-client.py`).
- Only the managed attribute block (`fc:managed-by` / `fc:tenant` / `fc:role` / optional
`fc:label` / `fc:regulated` / `fc:strict-mode`) is asserted; group parent/users/roles are
never touched.
- Wildcard SuperAdmin entries (`fcTenantId: "*"`) do not create a per-tenant group.
- `--dry-run` prints the plan and writes nothing — always run it first.
## Cross-links
- [`docs/standards/auth-acl-unattended-lifecycle-plan.md`](../../standards/auth-acl-unattended-lifecycle-plan.md) — ADR-198; Au-1/Au-3 lanes, VG-A1/A2.
- [`docs/standards/authentik-oidc-tenant-mapping-schema.md`](../../standards/authentik-oidc-tenant-mapping-schema.md) — the mapping JSON shape + 1P item layout (§2/§3).
- [`scripts/authentik/provision-oidc-client.py`](../../../scripts/authentik/provision-oidc-client.py) — sibling idempotent provisioner (same API + posture).
- [`scripts/credential-helper.sh`](../../../scripts/credential-helper.sh) — `get_op_token` 1P Connect bootstrap (fcadmin@noc1).

View File

@@ -1,151 +0,0 @@
# =====================================================================================
# authentik-tenant-mapping-sync — GATED nightly CronJob (Au-3 / ADR-198 §2.A P1)
#
# STATUS: GATED. spec.suspend: true (belt-and-suspenders). This manifest lives in a Notes
# STAGING path (docs/gated-manifests/) and is NOT under bluejay-infra apps/, so ArgoCD
# does not deploy it. It does NOTHING until Au-3 public-go (see README.md in this dir).
#
# WHAT IT RUNS: scripts/authentik/authentik-tenant-mapping-sync.py (Notes repo) — reads the
# 1Password Document `flowercore-tenant-mapping` via Connect REST and reconciles its
# mappings[].authentikGroup entries into Authentik groups (idempotent; never deletes
# unmanaged groups). Pairs Codex Cx2-7.
#
# SECRETS (referenced, NOT invented — no secret VALUES in this file):
# AUTHENTIK_TOKEN <- Secret authentik/authentik-credentials key BOOTSTRAP_ADMIN_TOKEN (exists)
# OP_TOKEN <- Secret authentik/tenant-mapping-sync-op-token key token
# (a copy of onepassword-system/onepassword-token — see README "OP_TOKEN
# cross-namespace" for the one-liner that mints it; OR mint via the
# OnePasswordItem CRD per feedback_1password_operator_pattern).
#
# The script is delivered via the ConfigMap below (same pattern as guacamole guac-k8s-sync).
# When this lane is libraryized/imaged later (ADR-198 §2.B P3) this ConfigMap can be replaced
# by a baked image; for now ConfigMap-delivery keeps the script the single source of truth.
# =====================================================================================
apiVersion: batch/v1
kind: CronJob
metadata:
name: authentik-tenant-mapping-sync
namespace: authentik
labels:
app.kubernetes.io/name: authentik-tenant-mapping-sync
app.kubernetes.io/component: sync
app.kubernetes.io/part-of: flowercore-identity
flowercore.io/adr: "198"
flowercore.io/gated: "true"
annotations:
flowercore.io/gate: "Au-3 public-go — suspended until tenant self-registration goes live"
flowercore.io/pairs-with: "Codex Cx2-7"
spec:
# GATE: suspended so it never fires until an operator un-suspends at Au-3 public-go.
suspend: true
# Nightly at 03:17 (off-peak; jittered minute to avoid colliding with other 03:00 jobs).
schedule: "17 3 * * *"
concurrencyPolicy: Forbid
startingDeadlineSeconds: 600
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 2
activeDeadlineSeconds: 600
template:
metadata:
labels:
app.kubernetes.io/name: authentik-tenant-mapping-sync
app.kubernetes.io/component: sync
spec:
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: 65532
runAsGroup: 65532
fsGroup: 65532
seccompProfile:
type: RuntimeDefault
containers:
- name: sync
# python:3.12-slim is sufficient: the script uses only the stdlib (urllib/json/ssl).
# No pip install needed. Pin a digest at activation time for air-gap reproducibility.
image: python:3.12-slim
imagePullPolicy: IfNotPresent
command:
- python3
- /scripts/authentik-tenant-mapping-sync.py
# NOTE: no --dry-run here -> this is the real reconcile. Operators wanting a
# dry-run first should `kubectl create job --from=cronjob/... ` with the arg
# appended, or run the script from a workstation. See README.
env:
- name: AUTHENTIK_URL
value: "https://id.iamworkin.lan"
- name: OP_CONNECT_URL
value: "http://10.0.56.10:8180/v1" # port 8180, NOT 8443
- name: OP_VAULT_ID
value: "qaphopopkryhbg353ukzhhuqoq" # IAmWorkin
- name: TENANT_MAPPING_ITEM
value: "flowercore-tenant-mapping"
- name: TENANT_MAPPING_FIELD
value: "mapping"
- name: AUTHENTIK_TOKEN
valueFrom:
secretKeyRef:
name: authentik-credentials
key: BOOTSTRAP_ADMIN_TOKEN
- name: OP_TOKEN
valueFrom:
secretKeyRef:
# A same-namespace copy of onepassword-system/onepassword-token.
# See README "OP_TOKEN cross-namespace". Until Au-3 this Secret need
# not exist (the job is suspended).
name: tenant-mapping-sync-op-token
key: token
resources:
requests:
cpu: 25m
memory: 64Mi
limits:
cpu: 250m
memory: 128Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
volumeMounts:
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: script
configMap:
name: authentik-tenant-mapping-sync-script
defaultMode: 0555
---
# The reconcile script, delivered as a ConfigMap (single source of truth = the Notes repo
# scripts/authentik/authentik-tenant-mapping-sync.py). At activation, regenerate this
# ConfigMap from the live script so the two never drift, e.g.:
# kubectl create configmap authentik-tenant-mapping-sync-script -n authentik \
# --from-file=authentik-tenant-mapping-sync.py=scripts/authentik/authentik-tenant-mapping-sync.py \
# --dry-run=client -o yaml > docs/gated-manifests/authentik-tenant-sync/configmap.script.yaml
# (kept as a placeholder body here so the manifest set is self-describing; the real body is
# the script file — DO NOT hand-edit a divergent copy into this ConfigMap.)
apiVersion: v1
kind: ConfigMap
metadata:
name: authentik-tenant-mapping-sync-script
namespace: authentik
labels:
app.kubernetes.io/name: authentik-tenant-mapping-sync
app.kubernetes.io/component: sync
flowercore.io/gated: "true"
annotations:
flowercore.io/source: "scripts/authentik/authentik-tenant-mapping-sync.py (Notes repo) — regenerate at activation, do not hand-edit"
data:
authentik-tenant-mapping-sync.py: |
# PLACEHOLDER — regenerate from the canonical script at activation (see annotation above).
# The Notes repo file scripts/authentik/authentik-tenant-mapping-sync.py is the source of
# truth; embedding a hand-copy here would drift. The orchestrator (or the activation
# runbook) materializes this ConfigMap from the live script via `kubectl create configmap
# ... --from-file=...` before un-suspending the CronJob.
import sys
sys.exit("authentik-tenant-mapping-sync ConfigMap not materialized from the canonical "
"script — regenerate with kubectl create configmap --from-file before activation.")

View File

@@ -1,39 +0,0 @@
# Public-TLS substrate (gated)
**Lane:** Cl-infra-2 (deep-regroup 2026-06-13). **Status:** authored, **NOT applied** — operator-gated.
This directory holds the Let's Encrypt + isolation substrate for **public** multi-tenant
web hosting. It lives **outside `apps/`** on purpose: the bluejay-infra ApplicationSet only
reconciles `apps/*`, so nothing here is auto-applied. Applying a cert-manager ACME
`ClusterIssuer` registers an ACME account immediately, so these stay inert until the
operator opens the web-hosting public-exposure gate (**R-1**).
## What's here
| File | What | Activate when |
|---|---|---|
| `letsencrypt-issuers.yaml` | `letsencrypt-staging` + `letsencrypt-prod` ClusterIssuers (HTTP-01 via Traefik; DNS-01 stub for wildcards) | Public-go. Move to `apps/cluster-issuers/`, **staging first**. |
| `tenant-networkpolicy-template.yaml` | Per-tenant default-deny + allowlist NetworkPolicy (Traefik ingress, CoreDNS, own-DB egress only) | Rendered per tenant at provision time (Wh-C2 isolation). |
## The gate
Public exposure is **NO-GO** until the §6 go/no-go checklist in
[`docs/standards/web-hosting-production-readiness-plan.md`](../../../FlowerCore.Notes/docs/standards/web-hosting-production-readiness-plan.md)
is green (currently 14/14 red) **and** the operator explicitly opens R-1. Internal
`*.iamworkin.lan` TLS stays on **step-ca** (`apps/fc-dns/fc-dns.yaml``step-ca-dns01`);
these LE issuers are **only** for public tenant domains.
## Pairing
- **Codex Wh-C1** consumes `letsencrypt-staging`/`-prod` for hybrid public TLS on
FlowerCore.PHP/MySQL/DNS.
- **Codex Wh-C2** consumes the NetworkPolicy template for cross-tenant isolation suites.
## Activation checklist (public-go)
1. Wire a public DNS-01 solver (Cloudflare/Namecheap webhook) **or** confirm public tenant
domains route HTTP-01 to the cluster ingress.
2. `git mv gated/public-tls/letsencrypt-issuers.yaml apps/cluster-issuers/` — staging only.
3. Issue one **staging** cert for a throwaway public domain; verify the chain in a browser.
4. Flip that tenant's Certificate `issuerRef` to `letsencrypt-prod`; mind LE rate limits.
5. Render `tenant-networkpolicy-template.yaml` per tenant; run the Wh-C2 negative suites.

View File

@@ -1,78 +0,0 @@
# ============================================================================
# Let's Encrypt ClusterIssuers — PUBLIC TLS substrate (Cl-infra-2, deep-regroup 2026-06-13)
# ============================================================================
# GATED. This file lives OUTSIDE apps/ on purpose, so the bluejay-infra
# ApplicationSet does NOT auto-apply it. Applying a cert-manager ACME
# ClusterIssuer registers an ACME account immediately, so we keep these inert
# until the operator opens the web-hosting public-exposure gate (R-1; the §6
# go/no-go checklist in docs/standards/web-hosting-production-readiness-plan.md
# is currently 14/14 red).
#
# Pairs with Codex Wh-C1 (FlowerCore.PHP/MySQL/DNS hybrid public TLS) and
# Wh-C2 (isolation). Internal *.iamworkin.lan certs STAY on step-ca
# (apps/fc-dns/fc-dns.yaml: ClusterIssuer step-ca-dns01) — these LE issuers are
# ONLY for public tenant domains.
#
# TO ACTIVATE (operator public-go):
# 1. Confirm a public DNS-01 solver is wired (Cloudflare/Namecheap webhook) OR
# that public tenant domains route HTTP-01 to the cluster's public ingress.
# 2. Move this file to apps/cluster-issuers/ (the ApplicationSet will create
# infra-cluster-issuers and apply it), staging FIRST.
# 3. Issue ONE staging cert for a throwaway public domain, verify the chain,
# THEN switch that tenant's Certificate issuerRef to letsencrypt-prod.
# 4. Mind LE prod rate limits (50 certs/registered-domain/week, 5 dupes/week).
#
# Registration email is for expiry notices only — adjust to a role address if
# desired (astoltz@iamwork.in is the current operator contact).
# ----------------------------------------------------------------------------
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
labels:
app.kubernetes.io/part-of: flowercore
flowercore.io/created-by: bluejay-infra
flowercore.io/gate: public-tls
spec:
acme:
# LE STAGING — untrusted certs, generous limits. Use this first, always.
server: https://acme-staging-v02.api.letsencrypt.org/directory
email: astoltz@iamwork.in
privateKeySecretRef:
name: letsencrypt-staging-account-key
solvers:
# HTTP-01 via Traefik. Requires the public tenant domain's :80 traffic to
# reach the cluster ingress. For wildcard / apex without inbound :80, swap
# to the dns01 solver block below (needs a public DNS provider webhook).
- http01:
ingress:
class: traefik
# --- DNS-01 alternative for wildcards (uncomment + wire a public DNS webhook) ---
# - dns01:
# webhook:
# groupName: acme.flowercore.io # or the cloudflare/namecheap solver
# solverName: <public-dns-solver>
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-prod
labels:
app.kubernetes.io/part-of: flowercore
flowercore.io/created-by: bluejay-infra
flowercore.io/gate: public-tls
spec:
acme:
# LE PRODUCTION — trusted certs, strict rate limits. Only after staging proves out.
server: https://acme-v02.api.letsencrypt.org/directory
email: astoltz@iamwork.in
privateKeySecretRef:
name: letsencrypt-prod-account-key
solvers:
- http01:
ingress:
class: traefik
# - dns01:
# webhook:
# groupName: acme.flowercore.io
# solverName: <public-dns-solver>

View File

@@ -1,59 +0,0 @@
# ============================================================================
# Per-tenant NetworkPolicy TEMPLATE — web-hosting isolation (Cl-infra-2 / Wh-C2)
# ============================================================================
# GATED substrate (outside apps/, not auto-applied). Modeled on the canonical
# default-deny + allowlist shape in apps/fc-devicemgmt/network-policy.yaml.
#
# Purpose: when a public multi-tenant site is provisioned, each tenant's pods
# get a NetworkPolicy that (a) default-denies all ingress/egress, then allows
# only Traefik ingress + CoreDNS + that tenant's own DB. This enforces the
# cross-tenant isolation Wh-C2 verifies with negative suites.
#
# Replace the {{TENANT}} placeholders and apply alongside the tenant's workload
# (the MySQL/PHP managers should emit this when they create a tenant, or a
# templating step in apps/ should render it). Kept here as the reference shape.
# ----------------------------------------------------------------------------
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: tenant-{{TENANT}}-isolation
namespace: fc-tenant-{{TENANT}}
labels:
app.kubernetes.io/part-of: flowercore
flowercore.io/tenant-id: "{{TENANT}}"
flowercore.io/created-by: bluejay-infra
flowercore.io/gate: public-tls
spec:
podSelector: {} # all pods in the tenant namespace
policyTypes: [Ingress, Egress]
ingress:
# Only Traefik may reach tenant pods (public traffic terminates at Traefik).
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: traefik-system
ports:
- { protocol: TCP, port: 80 }
- { protocol: TCP, port: 443 }
- { protocol: TCP, port: 8080 }
egress:
# CoreDNS resolution.
- to:
- namespaceSelector: {}
podSelector:
matchLabels:
k8s-app: kube-dns
ports:
- { protocol: UDP, port: 53 }
- { protocol: TCP, port: 53 }
# This tenant's OWN MySQL only (NOT other tenants' DBs — that's the isolation).
- to:
- podSelector:
matchLabels:
flowercore.io/tenant-id: "{{TENANT}}"
app.kubernetes.io/name: mysql
ports:
- { protocol: TCP, port: 3306 }
# NOTE: deliberately NO blanket egress. Add per-tenant allowances explicitly
# (object storage, mail relay, etc.) so a compromised tenant pod cannot reach
# the rest of the fleet or other tenants.

View File

@@ -1,15 +0,0 @@
# GX10 cluster platform layer (NOT old-cluster ArgoCD)
These manifests bootstrap the GX10 RKE2 cluster's platform layer for the NUC→GX10
migration. They are **direct-applied** to the GX10 (its own kubectl) during
bootstrap, and live under `gx10/` (NOT `apps/`) so the OLD cluster's bluejay-infra
ApplicationSet (whose `apps/*` generator targets the OLD cluster) does NOT
auto-deploy them there. Once ArgoCD is stood up on the GX10, a GX10-only
ApplicationSet (`apps-gx10/*`) will own these.
- `step-ca-acme.yaml` — cert-manager ClusterIssuer (ACME → noc1 step-ca, in-spec caBundle). APPLIED + Ready.
- `traefik-helmchart.yaml` — Traefik v3.6.10 (chart 39.0.5) via the RKE2 HelmChart CRD, LoadBalancer VIP 10.0.57.202 (prod-pool; temp parallel-run VIP — canonical .200 reclaimed at cutover). APPLIED.
cert-manager v1.17.2 was installed separately (upstream static manifest). See
`docs/ai-agents/gx10-migration-continuation-2026-06-14.md` + memory
`project_gx10_ai_node_2026_06_13`.

View File

@@ -1,14 +0,0 @@
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: step-ca-acme
spec:
acme:
server: https://10.0.56.10:9443/acme/acme/directory
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ4RENDQVdxZ0F3SUJBZ0lSQVBZMzU3RzZvdzZ6TUFMNSs0YlMya2t3Q2dZSUtvWkl6ajBFQXdJd1FERWEKTUJnR0ExVUVDaE1SU1VGdFYyOXlhMmx1SUVGRFRVVWdRMEV4SWpBZ0JnTlZCQU1UR1VsQmJWZHZjbXRwYmlCQgpRMDFGSUVOQklGSnZiM1FnUTBFd0hoY05Nall3TXpBNE1UZ3dOekV4V2hjTk16WXdNekExTVRnd056RXhXakJBCk1Sb3dHQVlEVlFRS0V4RkpRVzFYYjNKcmFXNGdRVU5OUlNCRFFURWlNQ0FHQTFVRUF4TVpTVUZ0VjI5eWEybHUKSUVGRFRVVWdRMEVnVW05dmRDQkRRVEJaTUJNR0J5cUdTTTQ5QWdFR0NDcUdTTTQ5QXdFSEEwSUFCSjJuMDRYMQpKWm81WmRxL2kxSWR2OCtmcXdaeUF6Qmg3d2hicWowU1dzSkw4VVdSYWJDTXFZQ3M3K2RYTzB4UlN6cWt3RkRMCngrdm9vT2FpOFJnUk5oYWpSVEJETUE0R0ExVWREd0VCL3dRRUF3SUJCakFTQmdOVkhSTUJBZjhFQ0RBR0FRSC8KQWdFQk1CMEdBMVVkRGdRV0JCUm51UFBRUjZpTS9INnZPbHVpVTNTeWdheXo4akFLQmdncWhrak9QUVFEQWdOSQpBREJGQWlFQXJRSzlkWVBHbUFac2RZbmp6aXVGVlZFNU5LWlVjY2VZdkdmR0MrdExYVXNDSUF1ZEYyekpyQ1JxCjNtSzUwWlpFVC9md1RrSndpRUY0ODI0bWpQOHAxQ0tNCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
privateKeySecretRef:
name: step-ca-acme-account-key
solvers:
- http01:
ingress:
ingressClassName: traefik

View File

@@ -1,81 +0,0 @@
apiVersion: helm.cattle.io/v1
kind: HelmChart
metadata:
name: traefik
namespace: kube-system
spec:
chart: traefik
repo: https://traefik.github.io/charts
version: "39.0.5"
targetNamespace: traefik-system
createNamespace: true
valuesContent: |
deployment:
replicas: 1
additionalArguments:
- "--api.dashboard=true"
- "--log.level=INFO"
- "--providers.kubernetescrd"
- "--providers.kubernetesingress"
- "--providers.kubernetescrd.allowEmptyServices=true"
- "--providers.kubernetesingress.allowEmptyServices=true"
- "--providers.kubernetesingress.ingressendpoint.publishedservice=traefik-system/traefik"
ingressRoute:
dashboard:
enabled: false
rbac:
enabled: true
service:
type: LoadBalancer
annotations:
metallb.io/loadBalancerIPs: "10.0.57.202"
metallb.io/address-pool: "prod-pool"
ports:
web:
port: 8000
exposedPort: 80
protocol: TCP
websecure:
port: 8443
exposedPort: 443
protocol: TCP
tls:
enabled: true
irc:
port: 6667
exposedPort: 6667
protocol: TCP
expose:
default: true
irctls:
port: 6697
exposedPort: 6697
protocol: TCP
expose:
default: true
traefik:
port: 8080
exposedPort: 8080
protocol: TCP
expose:
default: false
metrics:
port: 9100
exposedPort: 9100
protocol: TCP
expose:
default: false
metrics:
prometheus:
entryPoint: metrics
resources:
requests:
cpu: "100m"
memory: "128Mi"
limits:
cpu: "500m"
memory: "256Mi"
tolerations:
- key: "node-role.kubernetes.io/control-plane"
operator: "Exists"
effect: "NoSchedule"

View File

@@ -1,31 +0,0 @@
# GX10 Piper TTS — linux/arm64 (built natively on the GX10 / DGX Spark, aarch64).
# Serves the telephony /tts contract: POST {"text"} -> 16 kHz/16-bit/mono WAV.
# Voice baked into the image so there is no runtime HuggingFace dependency.
FROM python:3.12-slim
# espeak-ng is the phonemizer backend piper-tts uses at synthesis time.
RUN apt-get update \
&& apt-get install -y --no-install-recommends espeak-ng ca-certificates curl \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir piper-tts flask numpy
# Bake the voice model (en_US-amy-medium, 22.05 kHz native) into the image.
ARG PIPER_VOICE=en_US-amy-medium
ARG VOICE_BASE=https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/amy/medium
RUN mkdir -p /voices \
&& curl -sSL -o "/voices/${PIPER_VOICE}.onnx" "${VOICE_BASE}/${PIPER_VOICE}.onnx" \
&& curl -sSL -o "/voices/${PIPER_VOICE}.onnx.json" "${VOICE_BASE}/${PIPER_VOICE}.onnx.json" \
&& test -s "/voices/${PIPER_VOICE}.onnx" \
&& test -s "/voices/${PIPER_VOICE}.onnx.json"
COPY tts_service.py /app/tts_service.py
WORKDIR /app
ENV TTS_PORT=8500 \
PIPER_VOICE=en_US-amy-medium \
VOICES_DIR=/voices \
TARGET_RATE=16000
EXPOSE 8500
CMD ["python", "tts_service.py"]

View File

@@ -1,59 +0,0 @@
# GX10 Piper TTS — telephony `/tts` endpoint
CPU Piper TTS serving the telephony `/tts` contract on the **GX10 RKE2 cluster**
(ASUS Ascent GX10 / NVIDIA DGX Spark, ARM64, `10.0.56.14`). This is the
telephony-TTS-port-to-GX10 (P1) baseline: edge1 parity at higher quality, zero
GPU/aarch64 risk, frees telephony off the slow edge1 Pi 5.
## What it is
- `tts_service.py` — Flask app: `POST /tts {"text"}`**16 kHz / 16-bit / mono WAV**
(canonical 44-byte header) + `GET /health`. Voice `en_US-amy-medium` (22.05 kHz
native) is numpy-resampled to 16 kHz so it drops straight onto Asterisk's
`.sln16` path (telephony strips the 44-byte header). Same wire contract as the
edge1 `speech-pipeline` `/tts`, just the TTS half (no STT/Wyoming).
- `Dockerfile``linux/arm64`, voice baked in (no runtime HuggingFace dep).
- `gx10-tts.yaml` — Namespace `tts` + Deployment (CPU-only, **no GPU request** so it
co-resides with the GPU-holding Ollama pod) + NodePort Service.
## This cluster is NOT under the old-cluster ArgoCD (yet)
Apply manually with the GX10's own kubectl:
```bash
ssh -J noc1 -i ~/.ssh/fcadmin_ed25519 bluejay@10.0.56.14
export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
K=/var/lib/rancher/rke2/bin/kubectl
$K apply -f gx10-tts.yaml
```
## Build + import (native arm64 on the GX10)
```bash
docker build -t localhost/fc-gx10-tts:v20260614 .
docker save localhost/fc-gx10-tts:v20260614 -o /tmp/t.tar
sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/t.tar
# manifest uses imagePullPolicy: Never (image lives in containerd, no registry)
```
## Telephony cutover (reversible)
Endpoint telephony hits: **`http://10.0.56.14:30850`** (NodePort, MGMT VLAN 56).
In `apps/telephony/telephony.yaml`:
1. Deployment env `Tts__PiperUrl=http://10.0.56.14:30850`**this is the real lever**;
env vars override `appsettings.Production.json`, so the configmap `Tts` block alone
is inert (it was shadowed by a drifted live env `Tts__PiperUrl=edge1`).
2. NetworkPolicy egress to `10.0.56.14/32:30850` (telephony-web is `hostNetwork`, so this
only matters for non-hostNetwork pods; harmless either way).
3. edge1 (`10.0.57.17:8500`) stays warm — **rollback = set `Tts__PiperUrl` back to it**.
The TTS circuit breaker + `MapTextToSound` canned-prompt fallback mean a bad endpoint
degrades gracefully, never to silence.
## Verify (not a manual call)
```bash
FLOWERCORE_SIP_TEST_MODE=required dotnet.exe test \
FlowerCore.Telephony/tests/FlowerCore.Telephony.SipTests/FlowerCore.Telephony.SipTests.csproj \
--filter FullyQualifiedName~Call_Star100_ReceivesAudibleAudioStream
```
A passing audible test alone is NOT sufficient (edge1 also produces audible audio) —
confirm the **GX10 TTS pod's own access log** (`kubectl -n tts logs deploy/gx10-tts`)
shows `POST /tts 200` during the call, and telephony-web logs target `10.0.56.14:30850`.
## Voice upgrade (follow-on)
Operator's pick is **Kokoro**; needs GPU time-slicing (Ollama holds the GB10 GPU; MPS is
refuted on GB10) OR Kokoro-CPU behind a `/tts` shim. This Piper baseline stays as the floor.

View File

@@ -1,81 +0,0 @@
# GX10 Piper TTS — telephony /tts endpoint on the GX10 RKE2 cluster.
# Applied DIRECTLY via the GX10's own kubectl (KUBECONFIG=/etc/rancher/rke2/rke2.yaml);
# the GX10 cluster is NOT yet under the old-cluster ArgoCD. CPU-only (no GPU request)
# so it co-resides with the GPU-holding Ollama pod without contending for the GB10.
# Image is imported into RKE2 containerd (imagePullPolicy: Never).
# Telephony reaches it at http://10.0.56.14:30850 (NodePort, MGMT VLAN 56).
apiVersion: v1
kind: Namespace
metadata:
name: tts
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: gx10-tts
namespace: tts
labels:
app: gx10-tts
spec:
replicas: 1
selector:
matchLabels:
app: gx10-tts
template:
metadata:
labels:
app: gx10-tts
spec:
containers:
- name: tts
image: localhost/fc-gx10-tts:v20260614
imagePullPolicy: Never
ports:
- containerPort: 8500
name: http
env:
- name: TTS_PORT
value: "8500"
- name: PIPER_VOICE
value: "en_US-amy-medium"
- name: TARGET_RATE
value: "16000"
readinessProbe:
httpGet:
path: /health
port: 8500
initialDelaySeconds: 3
periodSeconds: 5
timeoutSeconds: 3
livenessProbe:
httpGet:
path: /health
port: 8500
initialDelaySeconds: 10
periodSeconds: 20
timeoutSeconds: 5
resources:
requests:
cpu: "500m"
memory: "512Mi"
limits:
cpu: "4"
memory: "2Gi"
---
apiVersion: v1
kind: Service
metadata:
name: gx10-tts
namespace: tts
labels:
app: gx10-tts
spec:
type: NodePort
selector:
app: gx10-tts
ports:
- name: http
port: 8500
targetPort: 8500
nodePort: 30850
protocol: TCP

View File

@@ -1,153 +0,0 @@
#!/usr/bin/env python3
"""GX10 Piper TTS microservice — telephony /tts contract.
POST /tts {"text": "..."} -> 16 kHz / 16-bit / mono WAV (canonical 44-byte header)
GET /health -> JSON status
The telephony AsteriskProvider strips the 44-byte WAV header and writes the
remainder as a `.sln16` (signed-linear 16 kHz) file that Asterisk transcodes to
any codec. So the response MUST be 16 kHz / 16-bit / mono. The en_US-amy-medium
voice is 22.05 kHz native, so we resample to 16 kHz (a 22.05 kHz stream treated
as 16 kHz plays ~1.38x too fast). This is a drop-in upgrade over edge1's
en_US-amy-low (16 kHz native, lower quality), keeping the exact wire contract.
"""
import io
import logging
import os
import sys
import threading
import wave
import numpy as np
from flask import Flask, Response, jsonify, request
API_PORT = int(os.environ.get("TTS_PORT", "8500"))
PIPER_VOICE = os.environ.get("PIPER_VOICE", "en_US-amy-medium")
VOICES_DIR = os.environ.get("VOICES_DIR", "/voices")
TARGET_RATE = int(os.environ.get("TARGET_RATE", "16000"))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
stream=sys.stdout,
)
log = logging.getLogger("gx10-tts")
piper_voice_obj = None
piper_loaded = False
piper_lock = threading.Lock()
native_rate = None
app = Flask(__name__)
def load_piper():
"""Load the Piper voice model once at startup (shared, lock-guarded)."""
global piper_voice_obj, piper_loaded
try:
from piper import PiperVoice
model_path = os.path.join(VOICES_DIR, f"{PIPER_VOICE}.onnx")
if not os.path.isfile(model_path):
log.error("Piper voice model not found at %s — TTS disabled", model_path)
piper_loaded = False
return
log.info("Loading Piper voice %s from %s", PIPER_VOICE, model_path)
piper_voice_obj = PiperVoice.load(model_path)
piper_loaded = True
log.info("Piper voice loaded")
except Exception as exc: # noqa: BLE001 — fail-soft, /health reports it
log.error("Failed to load Piper: %s", exc)
piper_loaded = False
def synthesize_chunks(text):
"""Run Piper synthesis under a lock because the loaded voice is shared."""
with piper_lock:
return list(piper_voice_obj.synthesize(text))
def resample_i16(pcm_i16, src_rate, dst_rate):
"""Linear-interpolation resample of int16 PCM (matches edge1's STT resample)."""
if src_rate == dst_rate or len(pcm_i16) == 0:
return pcm_i16
audio = pcm_i16.astype(np.float32)
target_len = int(round(len(audio) * dst_rate / src_rate))
if target_len <= 0:
return np.zeros(0, dtype=np.int16)
idx = np.linspace(0, len(audio) - 1, target_len)
res = np.interp(idx, np.arange(len(audio)), audio)
return np.clip(np.round(res), -32768, 32767).astype(np.int16)
@app.route("/health", methods=["GET"])
def health():
return jsonify({
"status": "ok",
"voice": PIPER_VOICE,
"loaded": piper_loaded,
"target_rate": TARGET_RATE,
"native_rate": native_rate,
})
@app.route("/tts", methods=["POST"])
def tts():
"""Text -> 16 kHz/16-bit/mono WAV. Mirrors the edge1 speech-pipeline contract."""
if not piper_loaded:
return jsonify({"error": "Piper TTS model not loaded"}), 503
data = request.get_json(silent=True)
if not data or "text" not in data:
return jsonify({"error": "Missing required field: text"}), 400
text = data["text"].strip()
if not text:
return jsonify({"error": "Text field is empty"}), 400
if len(text) > 10000:
return jsonify({"error": "Text too long (max 10000 characters)"}), 400
try:
chunks = synthesize_chunks(text)
if not chunks:
return jsonify({"error": "No audio produced"}), 500
global native_rate
first = chunks[0]
native_rate = first.sample_rate
if first.sample_width != 2 or first.sample_channels != 1:
return jsonify({
"error": f"Unexpected PCM format: width={first.sample_width} "
f"channels={first.sample_channels} (need 16-bit mono)"
}), 500
pcm = np.frombuffer(
b"".join(c.audio_int16_bytes for c in chunks), dtype=np.int16
)
out = resample_i16(pcm, native_rate, TARGET_RATE)
wav_buffer = io.BytesIO()
with wave.open(wav_buffer, "wb") as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(TARGET_RATE)
wav_file.writeframes(out.tobytes())
wav_buffer.seek(0)
return Response(
wav_buffer.read(),
mimetype="audio/wav",
headers={"Content-Disposition": 'inline; filename="speech.wav"'},
)
except Exception as exc: # noqa: BLE001
log.error("TTS synthesis failed: %s", exc)
return jsonify({"error": f"Synthesis failed: {exc}"}), 500
if __name__ == "__main__":
log.info(
"GX10 TTS starting on port %d (voice=%s -> %d Hz)",
API_PORT, PIPER_VOICE, TARGET_RATE,
)
load_piper()
app.run(host="0.0.0.0", port=API_PORT, threaded=True)

View File

@@ -1,48 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then
echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2
exit 64
fi
SUDO="${SUDO:-sudo}"
REPO="/etc/puppetlabs/code/environments/production"
CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key"
if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then
echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2
exit 65
fi
before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)"
echo "Before recreate: $before"
$SUDO systemctl stop openvoxserver
$SUDO podman rm openvoxserver 2>/dev/null || true
$SUDO systemctl start openvoxserver
sleep 50
$SUDO systemctl start puppet-deploy.service
sleep 5
$SUDO systemctl status puppet-deploy.service --no-pager -l
after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)"
echo "After recreate origin/master: $after"
$SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests
core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)"
case "$core_ssh" in
*"$CORE_SSH_COMMAND_FRAGMENT"*) ;;
*)
echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2
exit 1
;;
esac
$SUDO podman exec openvoxserver git -C "$REPO" status --short --branch
echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure."

View File

@@ -1,206 +0,0 @@
using FluentAssertions;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class DivoomPiDeployArtifactTests
{
private static readonly string Root = FindRepoRoot();
private static readonly string DmRoot = Path.Combine(Root, "apps", "fc-divoom-dm-pi-device");
private static readonly string TvRoot = Path.Combine(Root, "apps", "fc-divoom-tv-pi");
public static TheoryData<string> DmRequiredArtifacts => new()
{
"README.md",
"hiera/edge2-divoom-dm-device.overlay.yaml",
"puppet/profile/pi/service/divoom_dm_device.pp",
"puppet/templates/divoom-device-registration.json.epp",
"puppet/templates/flowercore-divoom-dm-agent.service.epp",
};
public static TheoryData<string> TvRequiredArtifacts => new()
{
"README.md",
"hiera/example-divoom-tv-pi.iamworkin.lan.yaml",
"puppet/profile/pi/service/divoom_tv.pp",
"systemd/flowercore-divoom-tv.service",
"systemd/flowercore-divoom-tv-hdmi.service",
"systemd/99-flowercore-divoom-tv-hdmi.rules",
"scripts/flowercore-divoom-tv-prelaunch.sh",
"scripts/flowercore-divoom-tv-launch.sh",
"scripts/flowercore-divoom-tv-hdmi-respond.sh",
};
[Theory]
[MemberData(nameof(DmRequiredArtifacts))]
public void DmDeviceArtifacts_ArePresent(string relativePath)
{
File.Exists(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
}
[Theory]
[MemberData(nameof(TvRequiredArtifacts))]
public void TvPiArtifacts_ArePresent(string relativePath)
{
File.Exists(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar))).Should().BeTrue(relativePath);
}
[Fact]
public void DmDeviceReadme_DeclaresPuppetSystemdNotKubernetes()
{
var readme = ReadDm("README.md");
readme.Should().Contain("not a Kubernetes application");
readme.Should().Contain("profile::pi::service::divoom");
readme.Should().Contain("no K8s surface");
}
[Fact]
public void DmHieraOverlay_PreservesExistingEdge2DivoomService()
{
var hiera = ReadDm("hiera/edge2-divoom-dm-device.overlay.yaml");
hiera.Should().Contain("fc-pimanager:");
hiera.Should().Contain("fc-divoom:");
hiera.Should().Contain("enabled: true");
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_enabled: false");
hiera.Should().Contain("profile::pi::service::divoom_dm_device::service_ensure: 'stopped'");
}
[Fact]
public void DmPuppetProfile_DefaultsToStoppedDisabledService()
{
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
profile.Should().Contain("Boolean $service_enabled = false");
profile.Should().Contain("Enum['running', 'stopped'] $service_ensure = 'stopped'");
profile.Should().Contain("service { $service_name:");
profile.Should().Contain("ensure => $service_ensure");
profile.Should().Contain("enable => $service_enabled");
}
[Fact]
public void DmPuppetProfile_DoesNotManageLiveDivoomWebUnit()
{
var profile = ReadDm("puppet/profile/pi/service/divoom_dm_device.pp");
profile.Should().NotContain("Service['flowercore-divoom.service']");
profile.Should().NotContain("service { 'flowercore-divoom.service'");
profile.Should().NotContain("notify => Service");
}
[Fact]
public void DmAgentUnit_IsSeparateAndGatedByExistingWrappers()
{
var unit = ReadDm("puppet/templates/flowercore-divoom-dm-agent.service.epp");
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-link.sh");
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/bt-reset.sh");
unit.Should().Contain("ConditionPathExists=<%= $divoom_install_dir %>/audio-link.sh");
unit.Should().Contain("ExecStart=<%= $agent_binary_path %> --mode=Pi");
unit.Should().NotContain("flowercore-divoom.service");
}
[Fact]
public void DmRegistration_CarriesRenderProofAndSafetyPolicy()
{
var registration = ReadDm("puppet/templates/divoom-device-registration.json.epp");
registration.Should().Contain("\"candidateChannels\": <%= $bt_channels_json %>");
registration.Should().Contain("\"deviceInfoIsRenderProof\": false");
registration.Should().Contain("\"visibleRenderProofRequired\": <%= $visible_render_proof_required %>");
registration.Should().Contain("\"preserveExistingService\": \"flowercore-divoom.service\"");
registration.Should().Contain("\"doNotEnableFmRadio\": true");
}
[Fact]
public void TvService_UsesAvaloniaHdmiSafetyGates()
{
var unit = ReadTv("systemd/flowercore-divoom-tv.service");
unit.Should().Contain("ConditionPathExists=/opt/flowercore/divoom-tv/FlowerCore.Divoom.Tv");
unit.Should().Contain("Environment=XDG_RUNTIME_DIR=/run/fc-divoom-tv");
unit.Should().Contain("RuntimeDirectoryMode=0700");
unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-divoom-tv-prelaunch.sh");
unit.Should().Contain("ExecStart=/usr/local/bin/flowercore-divoom-tv-launch.sh");
unit.Should().Contain("MemoryMax=2G");
unit.Should().Contain("PrivateTmp=true");
unit.Should().NotContain("/tmp");
}
[Fact]
public void TvLauncher_PrefersCageAndFallsBackToDirectLaunch()
{
var script = ReadTv("scripts/flowercore-divoom-tv-launch.sh");
script.Should().Contain("command -v cage");
script.Should().Contain("exec cage --");
script.Should().Contain("launching FlowerCore.Divoom.Tv directly");
script.Should().Contain("--target=hdmi");
script.Should().Contain("--presentation-mode=${PRESENTATION_MODE}");
}
[Fact]
public void TvHotplugRule_SettlesAndRestartsRenderer()
{
var rule = ReadTv("systemd/99-flowercore-divoom-tv-hdmi.rules");
var responder = ReadTv("scripts/flowercore-divoom-tv-hdmi-respond.sh");
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
rule.Should().Contain("start flowercore-divoom-tv-hdmi.service");
responder.Should().Contain("sleep 2");
responder.Should().Contain("systemctl restart flowercore-divoom-tv.service");
}
[Fact]
public void TvPuppetProfile_InstallsCageAndStaticArtifacts()
{
var profile = ReadTv("puppet/profile/pi/service/divoom_tv.pp");
profile.Should().Contain("package { ['cage', 'libgbm1', 'libdrm2', 'libxkbcommon0', 'fonts-dejavu-core']");
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv.service'");
profile.Should().Contain("'profile/pi/fc_divoom_tv/flowercore-divoom-tv-launch.sh'");
profile.Should().Contain("profile/pi/fc_divoom_tv/99-flowercore-divoom-tv-hdmi.rules");
profile.Should().Contain("Boolean $service_enabled = false");
}
[Fact]
public void DivoomArtifacts_DoNotAddKubernetesWorkloads()
{
var allText = Directory.GetFiles(DmRoot, "*", SearchOption.AllDirectories)
.Concat(Directory.GetFiles(TvRoot, "*", SearchOption.AllDirectories))
.Select(File.ReadAllText);
foreach (var text in allText)
{
text.Should().NotContain("kind: Deployment");
text.Should().NotContain("kind: IngressRoute");
text.Should().NotContain("kind: Certificate");
text.Should().NotContain("kind: OnePasswordItem");
}
}
private static string ReadDm(string relativePath)
=> File.ReadAllText(Path.Combine(DmRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
private static string ReadTv(string relativePath)
=> File.ReadAllText(Path.Combine(TvRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
private static string FindRepoRoot()
{
var current = new DirectoryInfo(AppContext.BaseDirectory);
while (current is not null)
{
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
&& File.Exists(Path.Combine(current.FullName, "README.md")))
{
return current.FullName;
}
current = current.Parent;
}
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
}
}

View File

@@ -13,21 +13,25 @@ public sealed class FleetManifestLintTests
private static readonly HashSet<string> PublicReadOnlyHosts = new(StringComparer.Ordinal)
{
"brochure.flowercore.io",
"dist.flowercore.io",
"dns.iamworkin.lan",
};
// Hosts that allow a tightly bounded write surface in addition to GET/HEAD.
// updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
// Public hosts that allow a tightly bounded write surface in addition to
// GET/HEAD. updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
// PUT/PATCH/DELETE must still 404 at the route. Public
// update.flowercore.io remains a GET/HEAD download surface in the
// FlowerCore.Updater sibling manifest and is covered by the general
// public-method allowlist lint instead of this write-surface rule.
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
// set should fail this lint.
//
// PUB-1 (2026-05-06): update.flowercore.io / updates.flowercore.io were
// added for the Cloudflare-proxied public Update Center edge. They use the
// same bounded read-write allowlist as the LAN pair.
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
{
"updatecenter.iamworkin.lan",
"updates.iamworkin.lan",
"update.flowercore.io",
"updates.flowercore.io",
};
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
@@ -50,10 +54,8 @@ public sealed class FleetManifestLintTests
"ttsreader-piper",
};
private static readonly IReadOnlyDictionary<string, string> LinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
private static readonly IReadOnlyDictionary<string, string> TopLinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
{
["github-runner"] = "https://github.com/astoltz/FlowerCore.Common",
["github-runner-sharedpos"] = "https://github.com/astoltz/FlowerCore.Shared.Pos",
["github-runner-puppet"] = "https://github.com/astoltz/FlowerCore.Puppet",
["github-runner-signage"] = "https://github.com/astoltz/FlowerCore.Signage",
["github-runner-dms"] = "https://github.com/astoltz/FlowerCore.DMS",
@@ -62,69 +64,6 @@ public sealed class FleetManifestLintTests
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
["github-runner-updater"] = "https://github.com/astoltz/FlowerCore.Updater",
};
private static readonly HashSet<string> RepoScopedLinuxRunnerDeployments = new(StringComparer.Ordinal)
{
"github-runner-sharedpos",
"github-runner-puppet",
"github-runner-signage",
"github-runner-dms",
"github-runner-telephony",
"github-runner-print-web",
"github-runner-chat",
"github-runner-mysql",
"github-runner-kiosk-linux",
"github-runner-updater",
};
private static readonly IReadOnlyDictionary<string, (string Deployment, string ProbePath)> BroaderHardeningDeployments =
new Dictionary<string, (string Deployment, string ProbePath)>(StringComparer.Ordinal)
{
["fc-aistation"] = ("aistation-web", "/healthz"),
["fc-chat"] = ("chat-web", "/healthz"),
["fc-devicemgmt"] = ("fc-devicemgmt-web", "/healthz"),
["fc-library"] = ("library-web", "/health"),
["fc-llm-bridge"] = ("fc-llm-bridge", "/healthz"),
["fc-messageboard"] = ("messageboard-web", "/health"),
["fc-retail"] = ("retail-web", "/healthz"),
["fc-ttsreader"] = ("ttsreader-web", "/health"),
["fc-updater"] = ("updatecenter-web", "/"),
["knowledge"] = ("knowledge-web", "/healthz"),
["telephony"] = ("telephony-web", "/health"),
["worldbuilder"] = ("worldbuilder-web", "/healthz"),
};
private static readonly HashSet<string> BroaderHardeningInternalPrestageApps = new(StringComparer.Ordinal)
{
"fc-aistation",
"fc-desktop",
"fc-dms",
"fc-library",
"fc-llm-bridge",
"fc-menuboard",
"fc-messageboard",
"fc-mysql",
"fc-php",
"fc-presentations",
"fc-retail",
"fc-scoreboard",
"fc-segmentdisplay",
"fc-signage",
"fc-ttsreader",
"knowledge",
"worldbuilder",
};
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
{
["HOME"] = "/home/runner",
["DOTNET_INSTALL_DIR"] = "/home/runner/.dotnet",
["DOTNET_CLI_HOME"] = "/home/runner",
["NUGET_PACKAGES"] = "/home/runner/.nuget/packages",
["XDG_CACHE_HOME"] = "/home/runner/.cache",
["RUNNER_TOOL_CACHE"] = "/home/runner/_tool",
};
[Fact]
@@ -261,20 +200,21 @@ public sealed class FleetManifestLintTests
}
[Fact]
public void GitHubRunnerFleet_MustRegisterRequiredReposAsRepoScopedDeployments()
public void GitHubRunnerFleet_MustRegisterTopLinuxReposAsRepoScopedDeployments()
{
var deployments = GitHubRunnerDeployments();
var deployments = Inventory.Documents
.Where(document => document.Kind == "Deployment")
.Where(document => document.Namespace == "github-runner")
.ToDictionary(document => document.Name, StringComparer.Ordinal);
foreach (var expectedRunner in LinuxRunnerRepos)
foreach (var expectedRunner in TopLinuxRunnerRepos)
{
deployments.Should().ContainKey(expectedRunner.Key);
var container = deployments[expectedRunner.Key].MainContainerMappings().Should().ContainSingle().Subject;
var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
EnvValue(container, "EPHEMERAL").Should().Be("true");
EnvValue(container, "DISABLE_AUTO_UPDATE").Should().Be("true", $"{expectedRunner.Key} must not self-update inside immutable Kubernetes runner pods");
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
EnvSecretName(container, "ACCESS_TOKEN").Should().Be("github-runner-token");
EnvSecretKey(container, "ACCESS_TOKEN").Should().Be("credential");
@@ -282,256 +222,51 @@ public sealed class FleetManifestLintTests
}
[Fact]
public void GitHubRunnerFleet_MustSetWritableNonRootDotnetAndCachePaths()
public void GitHubRunnerFleet_MustPreserveExistingCommonRunnerShape()
{
foreach (var deployment in GitHubRunnerDeployments().Values)
{
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
var common = Inventory.Documents
.Single(document => document.Kind == "Deployment"
&& document.Namespace == "github-runner"
&& document.Name == "github-runner");
foreach (var expectedEnv in WritableRunnerEnv)
{
EnvValue(container, expectedEnv.Key).Should().Be(expectedEnv.Value, $"{deployment.Name} must keep .NET paths writable for uid 1001");
}
var container = common.ContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "REPO_URL").Should().Be("https://github.com/astoltz/FlowerCore.Common");
EnvValue(container, "RUNNER_NAME_PREFIX").Should().Be("rke2-linux");
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
var mounts = ManifestNodeExtensions.MappingSequence(container, "volumeMounts")
.ToDictionary(
mount => ManifestNodeExtensions.Scalar(mount, "name") ?? string.Empty,
mount => ManifestNodeExtensions.Scalar(mount, "mountPath") ?? string.Empty,
StringComparer.Ordinal);
mounts.Should().Contain("runner-home", "/home/runner");
mounts.Should().Contain("nuget-cache", "/home/runner/.nuget/packages");
mounts.Should().Contain("tmp", "/tmp");
}
}
[Fact]
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForRepoScopedDeployments()
{
var deployments = GitHubRunnerDeployments();
foreach (var deploymentName in RepoScopedLinuxRunnerDeployments)
{
var deployment = deployments[deploymentName];
// Sprint 34 ops trimmed runner load while the cluster was degraded
// to two healthy nodes. Repo-scoped runners can be tuned back above
// one replica, but they must stay RWO-safe before that happens.
ReplicaCount(deployment).Should().BeGreaterOrEqualTo(1, $"{deploymentName} must keep at least one repo-scoped runner online");
var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
var claimNames = volumes
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
.Where(value => !string.IsNullOrWhiteSpace(value))
.ToList();
claimNames.Should().BeEmpty($"{deploymentName} must remain ready for safe multi-replica scaling without sharing a RWO PVC");
volumes.Should().Contain(volume =>
string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
&& ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
}
var common = deployments["github-runner"];
ReplicaCount(common).Should().Be(1);
common.MappingSequence("spec", "template", "spec", "volumes")
var claimNames = common.MappingSequence("spec", "template", "spec", "volumes")
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
.Where(value => !string.IsNullOrWhiteSpace(value))
.Should()
.ContainSingle()
.Which
.Should()
.Be("github-runner-nuget-cache");
}
[Fact]
public void Runners_MustNotPinToOperatorWorkstationHosts()
{
// CRITICAL SAFETY (operator directive 2026-05-26): BLUEJAY-WS is the
// operator's primary workstation — host of the 1Password Connect
// bearer token, fcadmin SSH keys to noc1, signing CA private keys,
// and source for every FC repo. A self-hosted GitHub Actions runner
// there would execute arbitrary PR code with that local access.
// Build-side analog of the Sprint 9 NEW safe-account exclusion gate
// (Puppet GPO/AppLocker/WDAC/audit-forwarder modules refuse to apply
// on BLUEJAY-WS). This lint asserts no GitHub-runner Deployment in
// apps/github-runner/ pins to a forbidden operator-workstation host
// via nodeName, nodeSelector, nodeAffinity, or tolerations.
// Existing legacy `bluejay-ws-sandbox-1` GitHub-registered runner is
// out of scope here (it's a runtime registration, not a K8s
// Deployment) — see CLAUDE.md "Common Mistakes" entry and
// feedback_bluejay_ws_never_public_runner.md.
var forbiddenHostPatterns = new[]
{
"bluejay-ws",
"BLUEJAY-WS",
"bluejay-ws.iamworkin.lan",
"iamworkin-ws",
};
bool ContainsForbidden(string? value) =>
!string.IsNullOrWhiteSpace(value)
&& forbiddenHostPatterns.Any(pattern => value!.Contains(pattern, StringComparison.OrdinalIgnoreCase));
var violations = GitHubRunnerDeployments().Values.SelectMany(deployment =>
{
var local = new List<string>();
var podSpec = ManifestNodeExtensions.Mapping(deployment.Root, "spec", "template", "spec");
if (podSpec is null)
{
return local;
}
// nodeName: pins the pod to a specific node by name.
var nodeName = ManifestNodeExtensions.Scalar(podSpec, "nodeName");
if (ContainsForbidden(nodeName))
{
local.Add($"{deployment.Name} sets nodeName='{nodeName}' which targets a forbidden operator-workstation host.");
}
// nodeSelector: dict of label → value pinning the pod to nodes
// carrying matching labels. Examples that would trip this:
// kubernetes.io/hostname: bluejay-ws
// flowercore.io/host: bluejay-ws.iamworkin.lan
var nodeSelector = ManifestNodeExtensions.Mapping(podSpec, "nodeSelector");
if (nodeSelector is not null)
{
foreach (var entry in nodeSelector.Children)
{
var key = entry.Key is YamlScalarNode keyScalar ? keyScalar.Value : null;
var value = entry.Value is YamlScalarNode valueScalar ? valueScalar.Value : null;
if (ContainsForbidden(value))
{
local.Add($"{deployment.Name} has nodeSelector entry '{key}: {value}' which targets a forbidden operator-workstation host.");
}
}
}
// nodeAffinity: matchExpressions over node labels.
foreach (var term in ManifestNodeExtensions.MappingSequence(podSpec, "affinity", "nodeAffinity", "requiredDuringSchedulingIgnoredDuringExecution", "nodeSelectorTerms"))
{
foreach (var expr in ManifestNodeExtensions.MappingSequence(term, "matchExpressions"))
{
var key = ManifestNodeExtensions.Scalar(expr, "key");
foreach (var valueNode in ManifestNodeExtensions.ScalarSequence(expr, "values"))
{
if (ContainsForbidden(valueNode))
{
local.Add($"{deployment.Name} has nodeAffinity matchExpression '{key}' value '{valueNode}' which targets a forbidden operator-workstation host.");
}
}
}
}
// tolerations: scheduling onto a tainted operator-workstation
// node would let the runner run there. Forbid any toleration
// value that names the workstation.
foreach (var toleration in ManifestNodeExtensions.MappingSequence(podSpec, "tolerations"))
{
var key = ManifestNodeExtensions.Scalar(toleration, "key");
var value = ManifestNodeExtensions.Scalar(toleration, "value");
if (ContainsForbidden(key))
{
local.Add($"{deployment.Name} has toleration key '{key}' which targets a forbidden operator-workstation host.");
}
if (ContainsForbidden(value))
{
local.Add($"{deployment.Name} has toleration value '{value}' which targets a forbidden operator-workstation host.");
}
}
return local;
}).ToList();
violations.Should().BeEmpty("BLUEJAY-WS / iamworkin-ws must never host a fleet GitHub Actions runner; see CLAUDE.md 'Registering BLUEJAY-WS as a fleet GitHub Actions runner' and feedback_bluejay_ws_never_public_runner.md");
}
[Fact]
public void Monitoring_MustAlertWhenLinuxRunnerDeploymentIsUnavailable()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("MacMiniRunnerOffline");
monitoring.Should().Contain("LinuxRunnerOffline");
monitoring.Should().Contain("kube_deployment_status_replicas_ready");
monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
monitoring.Should().Contain("folder: CI Alerts");
monitoring.Should().Contain("uid: linux-runner-offline");
monitoring.Should().Contain("alert_channel: irc");
}
[Fact]
public void Monitoring_GenericKubernetesAlerts_MustExcludeEphemeralGithubRunnerNamespace()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("kube_pod_container_status_restarts_total{namespace!=\"github-runner\"}");
monitoring.Should().Contain("and on(namespace, pod) kube_pod_info");
monitoring.Should().Contain("kube_deployment_spec_replicas{namespace!=\"github-runner\"} != kube_deployment_status_replicas_available{namespace!=\"github-runner\"}");
monitoring.Should().Contain("dedicated LinuxRunnerOffline/MacMiniRunnerOffline alerts");
}
[Fact]
public void Monitoring_BlackboxTargetsForOidcSensitiveServices_MustUseAnonymousHealthRoutesWhenAvailable()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("https://chat.iamworkin.lan/healthz");
monitoring.Should().Contain("https://dist.iamworkin.lan/healthz");
monitoring.Should().Contain("https://dms.iamworkin.lan/healthz");
monitoring.Should().Contain("https://print.iamworkin.lan/healthz");
monitoring.Should().Contain("https://knowledge.iamworkin.lan/healthz");
monitoring.Should().Contain("https://library.iamworkin.lan/health");
monitoring.Should().Contain("https://aistation.iamworkin.lan/healthz");
monitoring.Should().NotContain("https://print.iamworkin.lan/\"");
}
[Fact]
public void OidcEnforcedDeployments_WithHttpHealthzProbes_MustDeclareAnonymousHealthzContract()
{
var violations = Inventory.Documents
.Where(document => document.Kind == "Deployment")
.SelectMany(document => document.MainContainerMappings()
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
.Where(container => string.Equals(EnvValue(container, "FlowerCore__Auth__Oidc__Enabled"), "true", StringComparison.OrdinalIgnoreCase))
.Where(container => ProbeHttpGetPath(container, "readinessProbe") == "/healthz"
|| ProbeHttpGetPath(container, "startupProbe") == "/healthz")
.Where(_ => !string.Equals(
PodAnnotation(document, "flowercore.io/healthz-auth-policy"),
"allow-anonymous",
StringComparison.Ordinal))
.Select(container =>
{
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
return $"{document.Descriptor} container '{containerName}' enforces OIDC while probing /healthz but lacks flowercore.io/healthz-auth-policy: allow-anonymous.";
}))
.ToList();
violations.Should().BeEmpty();
claimNames.Should().Contain("github-runner-nuget-cache");
}
[Fact]
public void Knowledge_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
public void GitHubRunnerFleet_MustUseOneRwoCachePerRepoScopedDeployment()
{
var knowledge = Inventory.Documents
.Single(document => document.Kind == "Deployment" && document.Namespace == "knowledge" && document.Name == "knowledge-web");
var container = knowledge.MainContainerMappings().Should().ContainSingle().Subject;
var pvcNames = Inventory.Documents
.Where(document => document.Kind == "PersistentVolumeClaim")
.Where(document => document.Namespace == "github-runner")
.Select(document => document.Name)
.ToHashSet(StringComparer.Ordinal);
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
PodAnnotation(knowledge, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
foreach (var deploymentName in TopLinuxRunnerRepos.Keys)
{
var suffix = deploymentName["github-runner-".Length..];
pvcNames.Should().Contain($"github-runner-{suffix}-nuget-cache");
}
}
[Fact]
public void Distribution_OidcEnforcement_MustKeepHealthzAnonymousContractVisibleInManifest()
public void Monitoring_MustAlertWhenTopLinuxRunnerDeploymentIsUnavailable()
{
var distribution = Inventory.Documents
.Single(document => document.Kind == "Deployment" && document.Namespace == "fc-distribution" && document.Name == "fc-distribution");
var container = distribution.MainContainerMappings().Should().ContainSingle().Subject;
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be("true");
ProbeHttpGetPath(container, "readinessProbe").Should().Be("/healthz");
PodAnnotation(distribution, "flowercore.io/healthz-auth-policy").Should().Be("allow-anonymous");
monitoring.Should().Contain("LinuxRunnerOffline");
monitoring.Should().Contain("kube_deployment_status_replicas_available{namespace=\"github-runner\"");
monitoring.Should().Contain("github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
monitoring.Should().Contain("runbook_url: \"https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md\"");
}
[Fact]
@@ -638,370 +373,6 @@ public sealed class FleetManifestLintTests
violations.Should().BeEmpty();
}
[Fact]
public void FcDeviceManagement_MustShipExpectedManifestSet()
{
var appRoot = Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt");
Directory.Exists(appRoot).Should().BeTrue("Sprint 8 Cx-5 owns apps/fc-devicemgmt.");
var expectedFiles = new[]
{
"1password-item.yaml",
"certificate-web.yaml",
"clusterrole-operator.yaml",
"clusterrolebinding-operator.yaml",
"crds.yaml",
"deployment-operator.yaml",
"deployment-web.yaml",
"ingressroute-web.yaml",
"namespace.yaml",
"network-policy.yaml",
"service-web.yaml",
"serviceaccount-operator.yaml",
};
Directory.GetFiles(appRoot, "*.yaml")
.Select(Path.GetFileName)
.Should()
.BeEquivalentTo(expectedFiles);
foreach (var expectedFile in expectedFiles)
{
FcDeviceManagementDocuments()
.Should()
.Contain(document => document.RelativePath == $"fc-devicemgmt/{expectedFile}");
}
}
[Fact]
public void FcDeviceManagement_ObjectsMustCarryStandardTraceabilityLabels()
{
var requiredLabels = new[]
{
"app.kubernetes.io/name",
"app.kubernetes.io/part-of",
"app.kubernetes.io/managed-by",
"flowercore.io/tenant-id",
"flowercore.io/created-by",
};
var violations = FcDeviceManagementDocuments()
.SelectMany(document => requiredLabels
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("metadata", "labels", label)))
.Select(label => $"{document.Descriptor} is missing metadata.labels['{label}']."))
.Concat(FcDeviceManagementDocuments()
.Where(document => document.Kind == "Deployment")
.SelectMany(document => requiredLabels
.Where(label => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "labels", label)))
.Select(label => $"{document.Descriptor} pod template is missing metadata.labels['{label}'].")))
.Concat(FcDeviceManagementDocuments()
.Where(document => document.Kind == "Deployment")
.Where(document => string.IsNullOrWhiteSpace(document.Scalar("spec", "template", "metadata", "annotations", "flowercore.io/audit-trace-id")))
.Select(document => $"{document.Descriptor} pod template is missing flowercore.io/audit-trace-id."))
.ToList();
violations.Should().BeEmpty();
}
[Fact]
public void FcDeviceManagement_IngressMustUseCertManagerAndKeepPublicHostDisabled()
{
var appText = string.Join(
Environment.NewLine,
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
.Select(File.ReadAllText));
appText.Should().NotContain("certResolver");
appText.Should().Contain("update.flowercore.io");
appText.Should().Contain("disabled-until-Q-OIDC-1");
FcDeviceManagementDocuments()
.Where(document => document.Kind == "IngressRoute")
.SelectMany(document => document.MappingSequence("spec", "routes"))
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
.Should()
.Contain(match => match.Contains("Host(`devices.iamworkin.lan`)", StringComparison.Ordinal))
.And.NotContain(match => match.Contains("Host(`update.flowercore.io`)", StringComparison.Ordinal));
var certificate = FcDeviceManagementDocuments()
.Single(document => document.Kind == "Certificate" && document.Name == "fc-devicemgmt-web-tls");
certificate.Scalar("spec", "issuerRef", "name").Should().Be("step-ca-acme");
certificate.Scalar("spec", "issuerRef", "kind").Should().Be("ClusterIssuer");
ManifestNodeExtensions.ScalarSequence(certificate.Root, "spec", "dnsNames")
.Should()
.ContainSingle("devices.iamworkin.lan");
}
[Fact]
public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
{
var clusterRole = FcDeviceManagementDocuments()
.Single(document => document.Kind == "ClusterRole" && document.Name == "fc-devicemgmt-operator");
var allScalars = clusterRole.AllScalars().ToList();
allScalars.Should().Contain("flowercore.io");
allScalars.Should().NotContain("devices.flowercore.io");
allScalars.Should().Contain("*");
allScalars.Should().Contain("deployments");
allScalars.Should().Contain("get");
var operatorDeployment = FcDeviceManagementDocuments()
.Single(document => document.Kind == "Deployment" && document.Name == "fc-devicemgmt-operator");
operatorDeployment.AllScalars().Should().Contain("FLOWERCORE_KUBERNETES_OWNER_DEPLOYMENT");
operatorDeployment.AllScalars().Should().Contain("fc-devicemgmt-operator");
}
[Fact]
public void FcDeviceManagement_RuntimeSecretsMustUseOnePasswordItemPattern()
{
var item = FcDeviceManagementDocuments()
.Single(document => document.Kind == "OnePasswordItem" && document.Name == "fc-devicemgmt-runtime");
item.Scalar("spec", "itemPath")
.Should()
.Be("vaults/IAmWorkin/items/FlowerCore DeviceManagement Runtime");
var appText = string.Join(
Environment.NewLine,
Directory.GetFiles(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt"), "*.yaml")
.Select(File.ReadAllText));
FcDeviceManagementDocuments().Should().NotContain(document => document.Kind == "Secret");
appText.Should().Contain("secretKeyRef:");
appText.Should().Contain("name: fc-devicemgmt-runtime");
appText.Should().NotContain("stringData:");
appText.Should().NotContain("from-literal");
appText.Should().NotContain("tls.key:");
}
[Fact]
public void FcDeviceManagement_NetworkPoliciesMustAllowLanAgentsSynologyAndDnatPorts()
{
var policies = FcDeviceManagementDocuments()
.Where(document => document.Kind == "NetworkPolicy")
.ToList();
policies.Should().HaveCount(2);
var combinedScalars = policies.SelectMany(policy => policy.AllScalars()).ToList();
combinedScalars.Should().Contain("10.0.56.0/24");
combinedScalars.Should().Contain("10.0.57.0/24");
combinedScalars.Should().Contain("10.0.58.0/24");
combinedScalars.Should().Contain("10.0.68.0/27");
combinedScalars.Should().Contain("10.0.58.3/32");
var combinedEgressPorts = policies.SelectMany(policy => policy.EgressPorts()).ToHashSet(StringComparer.Ordinal);
combinedEgressPorts.Should().Contain(new[] { "80", "443", "8080", "8443", "2049", "111" });
var traefikVipPolicies = policies
.Where(policy => policy.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal)))
.ToList();
traefikVipPolicies.Should().ContainSingle();
traefikVipPolicies[0].EgressPorts().Should().Contain(new[] { "80", "443", "8080", "8443" });
}
[Fact]
public void FcDeviceManagement_MustRelyOnApplicationSetDiscovery()
{
var documents = FcDeviceManagementDocuments();
documents.Should().NotContain(document => document.Kind == "Application");
var ns = documents.Single(document => document.Kind == "Namespace" && document.Name == "fc-devicemgmt");
ns.FileText.Should().Contain("ArgoCD discovers this directory as Application `infra-fc-devicemgmt`.");
}
[Fact]
public void BroaderHardeningDeployments_MustAnnotateAnonymousHealthProbeIntent()
{
foreach (var expected in BroaderHardeningDeployments)
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
PodAnnotation(deployment, "fc.flowercore.io/healthz-anon").Should().Be("true");
PodAnnotation(deployment, "fc.flowercore.io/probe-path").Should().Be(expected.Value.ProbePath);
}
}
[Fact]
public void BroaderHardeningDeployments_MustDocumentForwardedProtoAuthPosture()
{
foreach (var expected in BroaderHardeningDeployments)
{
var deployment = AppDocuments(expected.Key)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Value.Deployment);
deployment.FileText.Should().Contain(
"fc-safe-to-expose: X-Forwarded-Proto handled by AddFlowerCoreWebAuth (ADR-178)");
}
}
[Fact]
public void BroaderHardeningInternalApps_MustOnlyPrestageCommentedPublicMethodAllowlist()
{
foreach (var app in BroaderHardeningInternalPrestageApps)
{
var documents = AppDocuments(app);
var text = string.Join(Environment.NewLine, documents.Select(document => document.FileText));
text.Should().Contain("PUBLIC HOST PRE-STAGING (DISABLED - Sprint 61+ exposure go-decision only)");
text.Should().Contain("# - match: Host(`");
text.Should().Contain("Method(`GET`) || Method(`HEAD`)");
documents
.Where(document => document.Kind == "IngressRoute")
.SelectMany(document => document.MappingSequence("spec", "routes"))
.Select(route => ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty)
.Should()
.NotContain(match => match.Contains(".flowercore.io", StringComparison.Ordinal),
"Sprint 61 broader hardening only pre-stages commented public hosts for internal-only apps");
}
}
[Fact]
public void OidcFlipServices_AreGitOpsManagedWithHealthzProbes()
{
var deployments = new[]
{
(App: "fc-dns", Name: "dns-web", Slug: "dns", Secret: "dns-oidc-client", AuthEnabled: "false"),
(App: "fc-media", Name: "fc-media-web", Slug: "media", Secret: "media-oidc-client", AuthEnabled: "true"),
(App: "fc-distribution", Name: "fc-distribution", Slug: "distribution", Secret: "distribution-oidc-client", AuthEnabled: "true"),
};
foreach (var expected in deployments)
{
var deployment = AppDocuments(expected.App)
.Single(document => document.Kind == "Deployment" && document.Name == expected.Name);
var container = deployment.MainContainerMappings().Should().ContainSingle().Subject;
EnvValue(container, "FlowerCore__Auth__Enabled").Should().Be(expected.AuthEnabled);
EnvValue(container, "FlowerCore__Auth__Oidc__Enabled").Should().Be("true");
(EnvValue(container, "FlowerCore__Auth__Oidc__Audience") ?? EnvValue(container, "FlowerCore__Auth__Oidc__ClientId"))
.Should()
.Be(expected.Slug);
EnvSecretName(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be(expected.Secret);
EnvSecretOptional(container, "FlowerCore__Auth__Oidc__ClientSecret").Should().Be("true");
ProbePath(container, "readinessProbe").Should().Be("/healthz");
if (ProbePath(container, "startupProbe") is { } startupProbePath)
{
startupProbePath.Should().Be("/healthz");
}
if (ProbePath(container, "livenessProbe") is { } livenessProbePath)
{
livenessProbePath.Should().Be("/healthz");
}
}
}
[Fact]
public void OidcFlipServices_UseOnePasswordItemClientSecrets()
{
var expectedItems = new Dictionary<string, (string Name, string ItemPath)>(StringComparer.Ordinal)
{
["fc-dns"] = ("dns-oidc-client", "vaults/IAmWorkin/items/dns-oidc-client"),
["fc-media"] = ("media-oidc-client", "vaults/IAmWorkin/items/media-oidc-client"),
["fc-distribution"] = ("distribution-oidc-client", "vaults/IAmWorkin/items/distribution-oidc-client"),
};
foreach (var expected in expectedItems)
{
var item = AppDocuments(expected.Key)
.Single(document => document.Kind == "OnePasswordItem" && document.Name == expected.Value.Name);
item.Scalar("spec", "itemPath").Should().Be(expected.Value.ItemPath);
}
}
[Fact]
public void DnsAndMediaGitOpsAdoption_PreservesLiveStorageAndImageShape()
{
var dnsDeployment = AppDocuments("fc-dns")
.Single(document => document.Kind == "Deployment" && document.Name == "dns-web");
var dnsContainer = dnsDeployment.MainContainerMappings().Should().ContainSingle().Subject;
var dnsPvc = AppDocuments("fc-dns")
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "dns-web-data");
ManifestNodeExtensions.Scalar(dnsContainer, "image").Should().Be("localhost/fc-dns-web:v20260613-g5-quota-aa99bd1");
dnsPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
dnsPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("1Gi");
var mediaDeployment = AppDocuments("fc-media")
.Single(document => document.Kind == "Deployment" && document.Name == "fc-media-web");
var mediaContainer = mediaDeployment.MainContainerMappings().Should().ContainSingle().Subject;
var mediaPvc = AppDocuments("fc-media")
.Single(document => document.Kind == "PersistentVolumeClaim" && document.Name == "fc-media-data");
ManifestNodeExtensions.Scalar(mediaContainer, "image").Should().Be("localhost/fc-media-web:v20260604-oidc-proper");
mediaPvc.Scalar("spec", "storageClassName").Should().Be("longhorn");
mediaPvc.Scalar("spec", "resources", "requests", "storage").Should().Be("20Gi");
mediaDeployment.AllScalars().Should().Contain(new[]
{
"/volume1/kubernetes/fc-media-transcodes",
"/volume1/kubernetes/fc-media-inbox",
"/volume1/video",
});
var distributionDeployment = AppDocuments("fc-distribution")
.Single(document => document.Kind == "Deployment" && document.Name == "fc-distribution");
var distributionContainer = distributionDeployment.MainContainerMappings().Should().ContainSingle().Subject;
ManifestNodeExtensions.Scalar(distributionContainer, "image").Should().Be("localhost/fc-distribution:v20260604-oidc-root-anon");
}
[Fact]
public void MonitoringProbes_UseHealthzForOidcGatedHosts()
{
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
monitoring.Should().Contain("\"https://dns.iamworkin.lan/healthz\"");
monitoring.Should().Contain("\"https://dist.iamworkin.lan/healthz\"");
monitoring.Should().Contain("\"https://media.iamworkin.lan/healthz\"");
monitoring.Should().NotContain("\"https://dns.iamworkin.lan/\"");
monitoring.Should().NotContain("\"https://dist.iamworkin.lan/\"");
monitoring.Should().NotContain("\"https://media.iamworkin.lan/\"");
}
[Fact]
public void DistributionPublicIngress_KeepsGetHeadMethodAllowlist()
{
var publicIngress = AppDocuments("fc-distribution")
.Single(document => document.Kind == "IngressRoute" && document.Name == "fc-distribution-public");
var route = publicIngress.MappingSequence("spec", "routes").Should().ContainSingle().Subject;
var match = ManifestNodeExtensions.Scalar(route, "match");
match.Should().Contain("Host(`dist.flowercore.io`)");
match.Should().Contain("Method(`GET`)");
match.Should().Contain("Method(`HEAD`)");
match.Should().NotContain("Method(`POST`)");
}
[Fact]
public void DnsAndMediaIngressRoutes_MatchLiveInternalHosts()
{
var dnsRoute = AppDocuments("fc-dns")
.Single(document => document.Kind == "IngressRoute" && document.Name == "dns-web")
.MappingSequence("spec", "routes")
.Should()
.ContainSingle()
.Subject;
var mediaRoute = AppDocuments("fc-media")
.Single(document => document.Kind == "IngressRoute" && document.Name == "fc-media-web")
.MappingSequence("spec", "routes")
.Should()
.ContainSingle()
.Subject;
ManifestNodeExtensions.Scalar(dnsRoute, "match").Should().Be("Host(`dns.iamworkin.lan`)");
ManifestNodeExtensions.Scalar(mediaRoute, "match").Should().Be("Host(`media.iamworkin.lan`)");
}
private static IEnumerable<string> ProbeViolations(
ManifestDocument document,
YamlMappingNode container,
@@ -1026,19 +397,6 @@ public sealed class FleetManifestLintTests
};
}
private static IReadOnlyDictionary<string, ManifestDocument> GitHubRunnerDeployments()
{
return Inventory.Documents
.Where(document => document.Kind == "Deployment")
.Where(document => document.Namespace == "github-runner")
.ToDictionary(document => document.Name, StringComparer.Ordinal);
}
private static int ReplicaCount(ManifestDocument document)
{
return int.TryParse(document.Scalar("spec", "replicas"), out var replicas) ? replicas : 1;
}
private static string? EnvValue(YamlMappingNode container, string name)
{
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
@@ -1058,50 +416,11 @@ public sealed class FleetManifestLintTests
: null;
}
private static string? EnvSecretOptional(YamlMappingNode container, string name)
{
return EnvMapping(container, name) is { } env
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "optional")
: null;
}
private static string? ProbePath(YamlMappingNode container, string probeKey)
{
return ManifestNodeExtensions.Scalar(container, probeKey, "httpGet", "path");
}
private static IReadOnlyList<ManifestDocument> AppDocuments(string app)
{
return Inventory.Documents
.Where(document => document.RelativePath.StartsWith($"{app}/", StringComparison.Ordinal))
.ToList();
}
private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name)
{
return ManifestNodeExtensions.MappingSequence(container, "env")
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
}
private static string? PodAnnotation(ManifestDocument document, string name)
{
return document.Scalar("spec", "template", "metadata", "annotations", name);
}
private static string? ProbeHttpGetPath(YamlMappingNode container, string probeKey)
{
return ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
&& ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet)
? ManifestNodeExtensions.Scalar(httpGet, "path")
: null;
}
private static IReadOnlyList<ManifestDocument> FcDeviceManagementDocuments()
{
return Inventory.Documents
.Where(document => document.RelativePath.StartsWith("fc-devicemgmt/", StringComparison.Ordinal))
.ToList();
}
}
internal sealed class ManifestInventory
@@ -1325,22 +644,6 @@ internal sealed record ManifestDocument(
.ToList();
}
// MainContainerMappings excludes initContainers. Use this when asserting
// properties of the primary container (env, image, volumeMounts) where an
// initContainer would be a false-positive match — e.g. the GitHub runner
// image's `setup-runner-home` initContainer should not count toward the
// single-container assertions on the runner deployments.
public IReadOnlyList<YamlMappingNode> MainContainerMappings()
{
var podSpec = PodSpec();
if (podSpec is null)
{
return Array.Empty<YamlMappingNode>();
}
return ManifestNodeExtensions.MappingSequence(podSpec, "containers").ToList();
}
public IReadOnlyList<ContainerSpec> ContainerSpecs()
{
return ContainerMappings()

View File

@@ -1,99 +0,0 @@
using FluentAssertions;
using Xunit;
namespace BluejayInfraLint.Tests;
[Trait("Category", "Unit")]
public sealed class OpenVoxServerDurabilityTests
{
private static readonly string Root = FindRepoRoot();
private static readonly string RunbookPath = Path.Combine(Root, "docs", "runbooks", "openvoxserver-quadlet-durability.md");
private static readonly string SmokePath = Path.Combine(Root, "scripts", "monitoring", "openvox-recreate-smoke.sh");
[Fact]
public void Runbook_DocumentsHostArtifactAndNonArgoPath()
{
var runbook = File.ReadAllText(RunbookPath);
runbook.Should().Contain("noc1 host artifact");
runbook.Should().Contain("not an ArgoCD application");
runbook.Should().Contain("systemctl cat openvoxserver");
runbook.Should().Contain("/etc/containers/systemd/openvoxserver.container");
}
[Fact]
public void Runbook_DocumentsCx12LiveApplyState()
{
var runbook = File.ReadAllText(RunbookPath);
runbook.Should().Contain("Sprint 32 Cx-12");
runbook.Should().Contain("openvoxserver-safeconfig.service");
runbook.Should().Contain("/opt/puppet/r10k-deploy.sh");
runbook.Should().Contain("HEAD == origin/master");
}
[Fact]
public void SmokeScript_IsExplicitlyOptIn()
{
var smoke = File.ReadAllText(SmokePath);
smoke.Should().Contain("OPENVOX_RECREATE_SMOKE");
smoke.Should().Contain("exit 64");
smoke.IndexOf("OPENVOX_RECREATE_SMOKE", StringComparison.Ordinal)
.Should().BeLessThan(smoke.IndexOf("systemctl stop openvoxserver", StringComparison.Ordinal));
}
[Fact]
public void SmokeScript_RequiresGeneratedSystemdUnitBeforeRemovingContainer()
{
var smoke = File.ReadAllText(SmokePath);
smoke.Should().Contain("systemctl cat openvoxserver");
smoke.Should().Contain("refusing to remove a container without a verified systemd recreate path");
smoke.IndexOf("systemctl cat openvoxserver", StringComparison.Ordinal)
.Should().BeLessThan(smoke.IndexOf("podman rm openvoxserver", StringComparison.Ordinal));
}
[Fact]
public void Artifacts_DoNotStoreSecretsOrPaidRunnerLabels()
{
var forbidden = new[]
{
"BEGIN OPENSSH PRIVATE KEY",
"BEGIN RSA PRIVATE KEY",
"ubuntu-latest",
"windows-latest",
"macos-latest",
};
var violations = new[] { RunbookPath, SmokePath }
.SelectMany(path =>
{
var text = File.ReadAllText(path);
return forbidden
.Where(token => text.Contains(token, StringComparison.OrdinalIgnoreCase))
.Select(token => $"{Path.GetRelativePath(Root, path)} contains forbidden token {token}");
})
.ToList();
violations.Should().BeEmpty();
}
private static string FindRepoRoot()
{
var current = new DirectoryInfo(AppContext.BaseDirectory);
while (current is not null)
{
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
&& Directory.Exists(Path.Combine(current.FullName, "scripts"))
&& File.Exists(Path.Combine(current.FullName, "README.md")))
{
return current.FullName;
}
current = current.Parent;
}
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
}
}

View File

@@ -174,13 +174,10 @@ public sealed class PiSignagePlayerArtifactTests
public void HdmiRule_RestartsPlayerAndRunsCapabilityDetection()
{
var rule = Read("systemd/99-flowercore-signage-hdmi.rules");
var responder = Read("scripts/flowercore-signage-hdmi-respond.sh");
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
rule.Should().Contain("start flowercore-signage-player-pi-hdmi.service");
responder.Should().Contain("sleep 2");
responder.Should().Contain("start flowercore-signage-detect-display.service");
responder.Should().Contain("restart flowercore-signage-player-pi.service");
rule.Should().Contain("restart flowercore-signage-player-pi.service");
rule.Should().Contain("start flowercore-signage-detect-display.service");
}
[Fact]

View File

@@ -1,6 +1,6 @@
package bluejayinfra.public_method_allowlist
public_hosts := {"brochure.flowercore.io", "dist.flowercore.io", "dns.iamworkin.lan"}
public_hosts := {"dist.flowercore.io", "dns.iamworkin.lan"}
deny[msg] {
input.kind == "IngressRoute"