Compare commits
24 Commits
f61901ccbd
...
claude/blu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca8d062826 | ||
|
|
1889462fc4 | ||
|
|
523ba61232 | ||
|
|
53f67c8713 | ||
|
|
6b9cf3d12c | ||
|
|
0b52093b36 | ||
|
|
7a9098d3bd | ||
|
|
57d7ba46a7 | ||
|
|
9ec2e2d52e | ||
|
|
b4d62a8a50 | ||
|
|
fbbc07023b | ||
|
|
4b0eef0fb0 | ||
|
|
bb09a3786f | ||
|
|
006dbcf671 | ||
|
|
1be71d6ba7 | ||
|
|
0c8026c912 | ||
|
|
621ae47e00 | ||
|
|
ae6b8c0142 | ||
|
|
da55220218 | ||
|
|
b1ad253dd6 | ||
|
|
ee935f6e07 | ||
|
|
2853ee2024 | ||
|
|
b4a34e16ca | ||
|
|
0d5a1fd530 |
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
# .NET build outputs (lint test project)
|
||||
**/bin/
|
||||
**/obj/
|
||||
|
||||
# Editor / temp
|
||||
.DS_Store
|
||||
*.swp
|
||||
15
README.md
15
README.md
@@ -99,8 +99,23 @@ curl -sk -X DELETE https://dns.iamworkin.lan/api/v1/servers/<serverId>/zones/iam
|
||||
- **CoreDNS template + ndots:5 collision**: inside pods, `<svc>.<ns>.svc.cluster.local` with <5 dots gets search-expanded through `iamworkin.lan` FIRST and hits the wildcard template → resolves to Traefik VIP, not the real ClusterIP. Use short service names (`<svc>`) in K8s manifests. See memory `feedback_coredns_ndots_template_collision.md`.
|
||||
- **Image not on node**: pods stuck `ErrImageNeverPull` means the image wasn't imported to the node Kubernetes scheduled the pod onto. `ctr images import` on all of rke2-server, rke2-agent1, rke2-agent2.
|
||||
- **StatefulSet PVC drift**: `volumeClaimTemplates` needs explicit `volumeMode: Filesystem` or ArgoCD SSA self-heals forever. See memory `feedback_argocd_statefulset_pvc_drift.md`.
|
||||
- **IngressRoute namespace split**: this RKE2 Traefik install does not allow cross-namespace service refs. Keep the `IngressRoute`, backend `Service`, and TLS secret in the same namespace; if one host is shared across namespaces, duplicate the `Certificate` and move the route next to the destination service.
|
||||
- **Public read-only hosts**: if a public host fronts a service that also exposes admin writes internally, add a Traefik route match like `Host(...) && (Method(GET) || Method(HEAD))` on the public edge instead of trusting the app to reject unsafe methods.
|
||||
- **Public read-write allowlist hosts**: if a public host accepts a tightly bounded write surface (e.g. bootstrap-JWT POST), pin the allowlist as `(Method(GET) || Method(HEAD) || Method(POST) || Method(OPTIONS))`. PUT/PATCH/DELETE must still 404 at the route. Track A's `updatecenter.iamworkin.lan` / `updates.iamworkin.lan` are the canonical example. The lint test enforces this invariant.
|
||||
- **Traefik VIP netpols**: when a `NetworkPolicy` allows `10.0.56.200`, also allow the post-DNAT backend ports (`8443` for TLS plus `8080` or `8000` for HTTP) or Calico will drop the rewritten flow.
|
||||
- **Auth-safe probes**: services behind API-key or global auth middleware should prefer `tcpSocket` probes unless `/health` is explicitly exempted before the middleware runs.
|
||||
- **ArgoCD must use internal Gitea URL**: `http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git`, not the external HTTPS URL (step-ca cert isn't trusted by ArgoCD). The `ApplicationSet` and any hand-created `Application` must both use the internal URL.
|
||||
|
||||
## Local manifest lint
|
||||
|
||||
The repo now carries a local-first lint pass for the recurring K8s gotchas that have burned the fleet:
|
||||
|
||||
```bash
|
||||
dotnet test tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj -c Release
|
||||
```
|
||||
|
||||
That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `FlowerCore.*\\k8s` manifests that share the same workspace. Matching `conftest.dev` policy files live under `tests/bluejay-infra-lint/conftest.dev/` for environments that also have `conftest` or `opa`.
|
||||
|
||||
## References
|
||||
|
||||
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
||||
|
||||
@@ -92,14 +92,17 @@ subjects:
|
||||
# =============================================================================
|
||||
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
||||
# =============================================================================
|
||||
# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
|
||||
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
||||
# Connects directly to fc-llm-bridge for chat + internal util/embed + browser.
|
||||
# Agent Zero's internal util/embed slots stay on the bridge's OpenAI-compatible
|
||||
# /v1 surface, while browser + corpus-search use the Ollama-compatible /api/*
|
||||
# surface through OLLAMA_HOST.
|
||||
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions.
|
||||
|
||||
---
|
||||
# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
|
||||
# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
|
||||
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
|
||||
# Consumed by the chat_model only; util / embedding / browser stay on local
|
||||
# Ollama via the 127.0.0.1 sidecar proxy.
|
||||
# Consumed by chat, internal util/embed, browser, and corpus-search requests
|
||||
# that traverse fc-llm-bridge.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
@@ -124,6 +127,18 @@ metadata:
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
|
||||
|
||||
---
|
||||
# Knowledge MCP bearer token for the direct Agent Zero -> Knowledge.Web path.
|
||||
# The 1Password item currently stores the raw token in its concealed PASSWORD
|
||||
# field, which the operator syncs to Secret key `password`.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: knowledge-mcp-tokens
|
||||
namespace: agent-zero
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
@@ -135,7 +150,7 @@ metadata:
|
||||
annotations:
|
||||
agent-zero/deployment: "nuc"
|
||||
agent-zero/profile: "bluejay"
|
||||
agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
|
||||
agent-zero/ollama: "fc-llm-bridge fronts edge1 Pi 5 + AI HAT+ Ollama for cluster browser/corpus-search traffic; internal chat/util/embed route through the bridge's authenticated OpenAI surface"
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
@@ -150,19 +165,18 @@ spec:
|
||||
spec:
|
||||
serviceAccountName: agent-zero
|
||||
initContainers:
|
||||
# Wait for edge1 Ollama to be reachable before starting Agent Zero.
|
||||
# (Workstation Ollama is intentionally NOT in the cluster path.)
|
||||
- name: wait-for-ollama
|
||||
# Wait for fc-llm-bridge to be reachable before starting Agent Zero.
|
||||
- name: wait-for-llm-bridge
|
||||
image: busybox:1.37
|
||||
command: ["sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
|
||||
until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
|
||||
echo "edge1 Ollama not ready yet, retrying in 5s..."
|
||||
echo "Waiting for fc-llm-bridge..."
|
||||
until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do
|
||||
echo "fc-llm-bridge not ready yet, retrying in 5s..."
|
||||
sleep 5
|
||||
done
|
||||
echo "edge1 Ollama is reachable."
|
||||
echo "fc-llm-bridge is reachable."
|
||||
# Assemble the Blue Jay profile directory structure from ConfigMaps.
|
||||
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
|
||||
- name: setup-bluejay
|
||||
@@ -209,73 +223,6 @@ spec:
|
||||
- name: bluejay-theme
|
||||
mountPath: /tmp/bluejay-theme
|
||||
containers:
|
||||
- name: ollama-proxy
|
||||
image: nginx:1.27-alpine
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
cat > /etc/nginx/nginx.conf <<'NGINX'
|
||||
worker_processes 1;
|
||||
events { worker_connections 1024; }
|
||||
http {
|
||||
upstream ollama_upstream {
|
||||
# edge1 Pi 5 + AI HAT+ is the SOLE upstream.
|
||||
# Workstation Ollama (BLUEJAY-WS) is private dev hardware and
|
||||
# MUST NOT be added back here without explicit operator decision —
|
||||
# adding it would expose the workstation to cluster traffic.
|
||||
server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
|
||||
keepalive 16;
|
||||
}
|
||||
server {
|
||||
listen 11434;
|
||||
# Local healthcheck — proves nginx itself is alive.
|
||||
# Must NOT depend on upstream so liveness doesn't restart
|
||||
# the container when edge1 is slow/offline.
|
||||
location = /healthz {
|
||||
access_log off;
|
||||
return 200 'ok\n';
|
||||
default_type text/plain;
|
||||
}
|
||||
location / {
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
proxy_set_header Host $host;
|
||||
proxy_connect_timeout 5s;
|
||||
proxy_read_timeout 600s;
|
||||
proxy_send_timeout 600s;
|
||||
proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
|
||||
proxy_pass http://ollama_upstream;
|
||||
}
|
||||
}
|
||||
}
|
||||
NGINX
|
||||
exec nginx -g 'daemon off;'
|
||||
ports:
|
||||
- containerPort: 11434
|
||||
# Readiness probe DOES check upstream so K8s only routes traffic
|
||||
# when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
|
||||
# slower TCP handshake under load (was timeoutSeconds=1 default →
|
||||
# 172 historic restarts when the workstation primary path went down,
|
||||
# before the cluster was repointed to edge1-only on 2026-04-27).
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /api/tags
|
||||
port: 11434
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 15
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
# Liveness probe hits ONLY local healthz — restarts the container
|
||||
# only when nginx itself is dead. Decoupling liveness from upstream
|
||||
# eliminates restart-loops caused by transient upstream outages.
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 11434
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
- name: agent-zero
|
||||
image: agent0ai/agent-zero:latest
|
||||
command: ["/bin/bash", "-c"]
|
||||
@@ -296,24 +243,41 @@ spec:
|
||||
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
||||
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
||||
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
||||
# config.json). util + embedding go to local 127.0.0.1 nginx
|
||||
# proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
|
||||
# is private dev hardware, intentionally not in the cluster path).
|
||||
# config.json). Utility + embedding stay on the authenticated
|
||||
# OpenAI-compatible /v1 surface; browser and direct tool traffic
|
||||
# use the bridge's Ollama-compatible root via OLLAMA_HOST.
|
||||
mkdir -p /a0/usr/plugins/_model_config
|
||||
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
||||
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
|
||||
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
|
||||
MODELCFG
|
||||
# Strip heredoc indentation
|
||||
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
|
||||
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
|
||||
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
|
||||
# the secret-backed CHAT_MCP_API_KEY env var before initialize.sh.
|
||||
# Use the in-cluster Chat service URL rather than the public
|
||||
# Traefik hostname so the pod stays off the private VIP lane that
|
||||
# the default egress rule blocks.
|
||||
if [ -n "${CHAT_MCP_API_KEY:-}" ]; then
|
||||
export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}"
|
||||
# the secret-backed env vars before initialize.sh. Keep the local
|
||||
# corpus_search.py tool mounted either way so outage fallback
|
||||
# remains available even when fc_knowledge is not advertised.
|
||||
export KNOWLEDGE_MCP_ENABLED=false
|
||||
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||
if curl -sf --connect-timeout 3 "${KNOWLEDGE_MCP_HEALTH_URL}" > /dev/null && \
|
||||
curl -sf --connect-timeout 5 \
|
||||
-H "Authorization: Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}" \
|
||||
-H "Accept: application/json, text/event-stream" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","id":"fc-knowledge-bootstrap","method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"agent-zero-bootstrap","version":"1.0"}}}' \
|
||||
"${KNOWLEDGE_MCP_URL}" > /dev/null; then
|
||||
export KNOWLEDGE_MCP_ENABLED=true
|
||||
echo "fc_knowledge enabled from ${KNOWLEDGE_MCP_URL}."
|
||||
else
|
||||
echo "fc_knowledge unavailable or unauthorized; keeping local corpus_search.py as the fallback path."
|
||||
fi
|
||||
else
|
||||
echo "fc_knowledge token missing; keeping local corpus_search.py as the fallback path."
|
||||
fi
|
||||
|
||||
export A0_SET_mcp_servers="$(
|
||||
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
|
||||
)"
|
||||
# Run the original entrypoint
|
||||
exec /exe/initialize.sh $BRANCH
|
||||
ports:
|
||||
@@ -325,8 +289,9 @@ spec:
|
||||
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
|
||||
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
|
||||
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
|
||||
# Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
|
||||
# for zero-latency, zero-cost small-model traffic.
|
||||
# Internal utility + embedding use the authenticated OpenAI surface,
|
||||
# while browser/corpus-search use the bridge's Ollama-compatible
|
||||
# endpoints so Agent Zero no longer needs a local proxy sidecar.
|
||||
- name: A0_SET_chat_model_provider
|
||||
value: "openai"
|
||||
- name: A0_SET_chat_model_name
|
||||
@@ -348,35 +313,51 @@ spec:
|
||||
secretKeyRef:
|
||||
name: fc-llm-bridge-api-keys
|
||||
key: agent-zero-k8s
|
||||
- name: FC_LLM_BRIDGE_API_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: fc-llm-bridge-api-keys
|
||||
key: agent-zero-k8s
|
||||
- name: A0_SET_chat_model_ctx_length
|
||||
value: "8192"
|
||||
- name: A0_SET_chat_model_kwargs
|
||||
value: '{"temperature": 0, "num_ctx": 8192}'
|
||||
# Utility model — fast small helper tier through the same proxy
|
||||
# Utility model — fast small helper tier through the OpenAI surface
|
||||
- name: A0_SET_util_model_provider
|
||||
value: "ollama"
|
||||
value: "openai"
|
||||
- name: A0_SET_util_model_name
|
||||
value: "qwen2.5:1.5b"
|
||||
value: "fc:cheap"
|
||||
- name: A0_SET_util_model_api_base
|
||||
value: "http://127.0.0.1:11434"
|
||||
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||
- name: A0_SET_util_model_kwargs
|
||||
value: '{"num_ctx": 2048}'
|
||||
# Embedding model — nomic through the same proxy
|
||||
# Embedding model — authenticated bridge alias to nomic-embed-text.
|
||||
# LiteLLM's embedding() path needs an explicit provider prefix here
|
||||
# even though the chat slot can use bare fc:* aliases.
|
||||
- name: A0_SET_embed_model_provider
|
||||
value: "ollama"
|
||||
value: "openai"
|
||||
- name: A0_SET_embed_model_name
|
||||
value: "nomic-embed-text"
|
||||
value: "openai/fc:embedding"
|
||||
- name: A0_SET_embed_model_api_base
|
||||
value: "http://127.0.0.1:11434"
|
||||
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
|
||||
# Browser model — small Gemma candidate through the same proxy
|
||||
- name: A0_SET_browser_model_provider
|
||||
value: "ollama"
|
||||
- name: A0_SET_browser_model_name
|
||||
value: "gemma3:4b"
|
||||
- name: A0_SET_browser_model_api_base
|
||||
value: "http://127.0.0.1:11434"
|
||||
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||
- name: A0_SET_browser_model_api_key
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: fc-llm-bridge-api-keys
|
||||
key: agent-zero-k8s
|
||||
- name: A0_SET_browser_model_vision
|
||||
value: "true"
|
||||
- name: OLLAMA_HOST
|
||||
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||
- name: FLOWERCORE_AGENTZERO_OLLAMA_URL
|
||||
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
|
||||
# Agent profile — Blue Jay personality, tools, and system prompt
|
||||
- name: A0_SET_agent_profile
|
||||
value: "bluejay"
|
||||
@@ -399,6 +380,19 @@ spec:
|
||||
name: chat-mcp-api-key
|
||||
key: api-key
|
||||
optional: true
|
||||
# FlowerCore.Knowledge MCP Phase 1 — direct Agent Zero client path.
|
||||
# Probe /healthz first, then try an authenticated initialize call.
|
||||
# If either fails, Agent Zero boots without fc_knowledge and keeps
|
||||
# the local corpus_search.py tool as the outage-safe path.
|
||||
- name: KNOWLEDGE_MCP_URL
|
||||
value: "http://knowledge-web.knowledge.svc/mcp"
|
||||
- name: KNOWLEDGE_MCP_HEALTH_URL
|
||||
value: "http://knowledge-web.knowledge.svc/healthz"
|
||||
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: knowledge-mcp-tokens
|
||||
key: password
|
||||
# Print.Web — Thermal printer service on edge2.
|
||||
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
|
||||
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
|
||||
@@ -452,7 +446,7 @@ spec:
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
|
||||
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null"
|
||||
periodSeconds: 30
|
||||
failureThreshold: 2
|
||||
resources:
|
||||
@@ -590,13 +584,6 @@ spec:
|
||||
protocol: UDP
|
||||
- port: 53
|
||||
protocol: TCP
|
||||
# Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
|
||||
# is private dev hardware and intentionally not allowlisted)
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 10.0.57.17/32
|
||||
ports:
|
||||
- port: 11434
|
||||
# Print.Web on edge2
|
||||
- to:
|
||||
- ipBlock:
|
||||
@@ -630,6 +617,17 @@ spec:
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
# FlowerCore.Knowledge MCP (Phase 1) — in-cluster direct route with
|
||||
# anonymous /healthz probe plus authenticated /mcp initialize/tool calls.
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: knowledge
|
||||
ports:
|
||||
- port: 80
|
||||
protocol: TCP
|
||||
- port: 8080
|
||||
protocol: TCP
|
||||
# Intranet search API — use in-cluster svc so traffic stays inside
|
||||
# the cluster and is not blocked by the private-range egress denylist.
|
||||
- to:
|
||||
|
||||
@@ -7209,6 +7209,9 @@ data:
|
||||
"keep_alive": keep_alive,
|
||||
"stream": False,
|
||||
})
|
||||
curl_headers = ["-H", "Content-Type: application/json"]
|
||||
if os.environ.get("FC_LLM_BRIDGE_API_KEY"):
|
||||
curl_headers.extend(["-H", f"X-Api-Key: {os.environ['FC_LLM_BRIDGE_API_KEY']}"])
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
@@ -7216,7 +7219,7 @@ data:
|
||||
"curl", "-s", "--max-time", "120",
|
||||
"-X", "POST",
|
||||
f"{api_base}/api/generate",
|
||||
"-H", "Content-Type: application/json",
|
||||
*curl_headers,
|
||||
"-d", payload,
|
||||
],
|
||||
capture_output=True,
|
||||
@@ -13191,6 +13194,7 @@ data:
|
||||
"FLOWERCORE_AGENTZERO_OLLAMA_URL",
|
||||
"http://host.containers.internal:11434",
|
||||
)
|
||||
BRIDGE_API_KEY = os.environ.get("FC_LLM_BRIDGE_API_KEY", "").strip()
|
||||
EMBEDDING_MODEL = os.environ.get(
|
||||
"FLOWERCORE_FLEET_EMBEDDING_MODEL",
|
||||
"nomic-embed-text",
|
||||
@@ -13327,10 +13331,13 @@ data:
|
||||
def _embed(text: str) -> list:
|
||||
"""Embed a query via Ollama's /api/embeddings. Single-vector response."""
|
||||
body = json.dumps({"model": EMBEDDING_MODEL, "prompt": text}).encode("utf-8")
|
||||
headers = {"Content-Type": "application/json"}
|
||||
if BRIDGE_API_KEY:
|
||||
headers["X-Api-Key"] = BRIDGE_API_KEY
|
||||
req = urllib.request.Request(
|
||||
f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings",
|
||||
data=body,
|
||||
headers={"Content-Type": "application/json"},
|
||||
headers=headers,
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=60) as resp:
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
|
||||
@@ -20,7 +20,19 @@ spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-agent1
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
# Keep the search list free of iamworkin.lan so CoreDNS's wildcard
|
||||
# template cannot hijack public egress like downloads.asterisk.org.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- telephony.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
securityContext:
|
||||
fsGroup: 0
|
||||
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
||||
|
||||
@@ -87,6 +87,20 @@ spec:
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
# Use an explicit DNS policy so external FQDNs like api.anthropic.com are
|
||||
# resolved directly instead of being expanded through the cluster search
|
||||
# path that includes iamworkin.lan.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- fc-llm-bridge.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
@@ -97,7 +111,7 @@ spec:
|
||||
# dotnet.exe publish -c Release -o deploy/app \
|
||||
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
|
||||
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
|
||||
image: localhost/fc-llm-bridge:v202604231520
|
||||
image: localhost/fc-llm-bridge:v202604300022
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
@@ -116,6 +130,10 @@ spec:
|
||||
value: "default"
|
||||
- name: FlowerCore__LlmBridge__DefaultAppName
|
||||
value: "agent-zero"
|
||||
- name: FlowerCore__LlmBridge__UtilModel
|
||||
value: "qwen2.5:1.5b"
|
||||
- name: FlowerCore__LlmBridge__EmbedModel
|
||||
value: "nomic-embed-text"
|
||||
# Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys.
|
||||
# Each field becomes a Secret key of the same name. The key-name
|
||||
# lands in the auth principal's `fc.app` claim for ledger scoping.
|
||||
@@ -207,17 +225,6 @@ spec:
|
||||
port: 8080
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 30
|
||||
# Lower ndots so external FQDNs like api.anthropic.com are tried BEFORE
|
||||
# the ndots:5 default expands them through the cluster search path, which
|
||||
# includes iamworkin.lan. CoreDNS has a `template IN A iamworkin.lan`
|
||||
# wildcard that answers `api.anthropic.com.iamworkin.lan` with the
|
||||
# Traefik VIP, which then serves a TRAEFIK-DEFAULT-CERT TLS cert and
|
||||
# breaks egress to the real Anthropic API (memory:
|
||||
# feedback_coredns_ndots_template_collision, generalized to external DNS).
|
||||
dnsConfig:
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
|
||||
@@ -69,16 +69,14 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
|
||||
@@ -76,15 +76,13 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
|
||||
@@ -37,6 +37,19 @@ spec:
|
||||
app.kubernetes.io/name: ttsreader-piper
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
# Bypass CoreDNS's *.iamworkin.lan wildcard so the init container reaches
|
||||
# huggingface.co directly when it seeds voice models.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- fc-ttsreader.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
initContainers:
|
||||
- name: seed-voices
|
||||
image: rhasspy/wyoming-piper:latest
|
||||
@@ -519,7 +532,7 @@ spec:
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-ttsreader-web:v202604291817
|
||||
image: localhost/fc-ttsreader-web:v202604301236-b6ca2d5
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5217
|
||||
@@ -537,6 +550,8 @@ spec:
|
||||
value: "/usr/bin/ffmpeg"
|
||||
- name: TtsReader__Bible__CorpusRoot
|
||||
value: "/data/corpus-cache/world-english-bible/eng/usx"
|
||||
- name: TtsReader__ChapterContext__DatabasePath
|
||||
value: "/data/chapter-context.db"
|
||||
- name: TtsReader__Jobs__Root
|
||||
value: "/data/jobs"
|
||||
- name: TtsReader__Piper__Host
|
||||
@@ -609,7 +624,10 @@ spec:
|
||||
optional: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
# The cluster is currently saturated on requested CPU by
|
||||
# remotedesktop workloads even when real usage is low.
|
||||
# Keep the web frontend schedulable under that pressure.
|
||||
cpu: 10m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
|
||||
@@ -46,7 +46,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: intranet-web
|
||||
image: localhost/fc-intranet-web:v20260429-1646
|
||||
image: localhost/fc-intranet-web:v20260505-1041
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5300
|
||||
|
||||
@@ -5,7 +5,9 @@ Phase 2.4 closed. Pod running, certificate issued (step-ca-acme), PVC
|
||||
bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
|
||||
returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
|
||||
*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
|
||||
population).
|
||||
population). Phase 1 of the Agent Zero MCP rollout keeps `/healthz`
|
||||
anonymous and gates `/mcp` behind `Authorization: Bearer <token>` built
|
||||
from the 1Password item `FlowerCore Knowledge MCP Tokens`.
|
||||
|
||||
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
|
||||
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
|
||||
@@ -19,6 +21,12 @@ search to the rest of the FC ecosystem (Agent Zero, Chat.Web persona
|
||||
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
|
||||
bot, Pi nodes via `fc-index sync`).
|
||||
|
||||
Phase 1 MCP routing is explicit:
|
||||
|
||||
- in-cluster Agent Zero → `http://knowledge-web.knowledge.svc/mcp`
|
||||
- workstation Agent Zero → `https://knowledge.iamworkin.lan/mcp`
|
||||
- probe URL for both lanes → `/healthz`
|
||||
|
||||
## Deployment order (do NOT skip / reorder)
|
||||
|
||||
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
|
||||
|
||||
@@ -40,16 +40,16 @@ metadata:
|
||||
labels:
|
||||
app.kubernetes.io/part-of: bluejay-infra
|
||||
---
|
||||
# MCP API key — synced from 1Password so /mcp stays gated without baking
|
||||
# secrets into Git. The PASSWORD category maps the concealed field to Secret
|
||||
# key `password`, which the Deployment reads into FlowerCore:Mcp:ApiKey:Key.
|
||||
# MCP bearer token for the read-only Agent Zero Phase 1 lane. The 1Password
|
||||
# item currently stores the raw token in its concealed PASSWORD field, which
|
||||
# the operator syncs into the namespaced Secret key `password`.
|
||||
apiVersion: onepassword.com/v1
|
||||
kind: OnePasswordItem
|
||||
metadata:
|
||||
name: knowledge-mcp-api-key
|
||||
name: knowledge-mcp-tokens
|
||||
namespace: knowledge
|
||||
spec:
|
||||
itemPath: "vaults/IAmWorkin/items/KnowledgeApiKey"
|
||||
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
@@ -102,8 +102,17 @@ spec:
|
||||
- name: web
|
||||
# Placeholder tag — bump to the image you built + imported to ALL
|
||||
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
|
||||
image: localhost/fc-knowledge-web:v202604272200
|
||||
image: localhost/fc-knowledge-web:v20260429232635
|
||||
imagePullPolicy: Never
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
|
||||
export FlowerCore__Mcp__ApiKey__Key="Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}"
|
||||
fi
|
||||
exec dotnet FlowerCore.Knowledge.Web.dll
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
@@ -115,7 +124,7 @@ spec:
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
# Vector-store directory + embedding model + edition profile dir.
|
||||
# Profile JSON is baked into the image at /app/editions via the
|
||||
# Profile JSON is baked into the image at /home/app/editions via the
|
||||
# csproj Content-link from FlowerCore.Common/editions/.
|
||||
- name: Knowledge__VectorStoresDirectory
|
||||
value: "/data/vector-stores"
|
||||
@@ -126,7 +135,7 @@ spec:
|
||||
- name: Knowledge__MaxLimit
|
||||
value: "50"
|
||||
- name: FlowerCore__Editions__ProfileDirectory
|
||||
value: "/app/editions"
|
||||
value: "/home/app/editions"
|
||||
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
|
||||
# services do not depend on BLUEJAY-WS (private dev hardware) per
|
||||
# bluejay-infra@0f9d56e. Query-time embedding is fast enough on
|
||||
@@ -138,7 +147,14 @@ spec:
|
||||
- name: FlowerCore__Mcp__ApiKey__Key
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: knowledge-mcp-api-key
|
||||
name: knowledge-mcp-tokens
|
||||
key: password
|
||||
- name: FlowerCore__Mcp__ApiKey__HeaderName
|
||||
value: "Authorization"
|
||||
- name: KNOWLEDGE_MCP_BEARER_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: knowledge-mcp-tokens
|
||||
key: password
|
||||
resources:
|
||||
requests:
|
||||
@@ -185,7 +201,7 @@ spec:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
mountPath: /home/app/logs
|
||||
volumes:
|
||||
- name: vector-store
|
||||
persistentVolumeClaim:
|
||||
|
||||
762
apps/monitoring/fc-updatecenter-dashboard.json
Normal file
762
apps/monitoring/fc-updatecenter-dashboard.json
Normal file
@@ -0,0 +1,762 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [
|
||||
{
|
||||
"icon": "external link",
|
||||
"includeVars": false,
|
||||
"keepTime": false,
|
||||
"targetBlank": true,
|
||||
"title": "Open Service",
|
||||
"type": "link",
|
||||
"url": "https://updatecenter.iamworkin.lan/"
|
||||
}
|
||||
],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {
|
||||
"color": "#f87171",
|
||||
"index": 1,
|
||||
"text": "DOWN"
|
||||
},
|
||||
"1": {
|
||||
"color": "#4ade80",
|
||||
"index": 0,
|
||||
"text": "UP"
|
||||
}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "#f87171",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#4ade80",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "Availability"
|
||||
}
|
||||
],
|
||||
"title": "Service Availability",
|
||||
"transparent": true,
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 2,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "#f87171",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#fbbf24",
|
||||
"value": 95
|
||||
},
|
||||
{
|
||||
"color": "#FFB300",
|
||||
"value": 99
|
||||
},
|
||||
{
|
||||
"color": "#4ade80",
|
||||
"value": 99.9
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background_solid",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "avg_over_time(probe_success{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}[24h]) * 100",
|
||||
"refId": "A",
|
||||
"legendFormat": "24h Uptime"
|
||||
}
|
||||
],
|
||||
"title": "24-Hour Uptime",
|
||||
"transparent": true,
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"max": 30,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "#f87171",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#fbbf24",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "#4ade80",
|
||||
"value": 7
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "d"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"minVizHeight": 75,
|
||||
"minVizWidth": 75,
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "(probe_ssl_earliest_cert_expiry{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"} - time()) / 86400",
|
||||
"refId": "A",
|
||||
"legendFormat": "Days Remaining"
|
||||
}
|
||||
],
|
||||
"title": "Cert Expiry (Days)",
|
||||
"transparent": true,
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "Response Time (seconds)",
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 12,
|
||||
"gradientMode": "scheme",
|
||||
"lineInterpolation": "smooth",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 4,
|
||||
"showPoints": "never",
|
||||
"spanNulls": true,
|
||||
"thresholdsStyle": {
|
||||
"mode": "dashed"
|
||||
}
|
||||
},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "#4ade80",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "#fbbf24",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "#f87171",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 14,
|
||||
"x": 0,
|
||||
"y": 4
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [
|
||||
"lastNotNull",
|
||||
"mean",
|
||||
"max"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "probe_duration_seconds{job=\"probe-traefik-services\",instance=\"updatecenter.iamworkin.lan\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "Probe Duration"
|
||||
}
|
||||
],
|
||||
"timeFrom": "1h",
|
||||
"title": "Response Time (1h Trend)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 10,
|
||||
"x": 14,
|
||||
"y": 4
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"alertInstanceLabelFilter": "{instance=\"updatecenter.iamworkin.lan\"}",
|
||||
"alertName": "",
|
||||
"dashboardAlerts": false,
|
||||
"groupBy": [],
|
||||
"groupMode": "default",
|
||||
"maxItems": 10,
|
||||
"sortOrder": 1,
|
||||
"stateFilter": {
|
||||
"error": true,
|
||||
"firing": true,
|
||||
"noData": true,
|
||||
"normal": false,
|
||||
"pending": true
|
||||
},
|
||||
"viewMode": "list"
|
||||
},
|
||||
"title": "Active Alerts",
|
||||
"type": "alertlist"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 12
|
||||
},
|
||||
"id": 20,
|
||||
"title": "OTEL Counters — Track 1D",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 13
|
||||
},
|
||||
"id": 21,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "sum by (status) (rate(updatecenter_manifest_requests_total[5m]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "status={{status}}"
|
||||
}
|
||||
],
|
||||
"title": "Manifest Requests rate by status (5m)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "Bps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 13
|
||||
},
|
||||
"id": 22,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "sum by (slug) (rate(updatecenter_bundle_download_bytes_total[5m]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{slug}}"
|
||||
}
|
||||
],
|
||||
"title": "Bundle Download Throughput by slug (5m)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 21
|
||||
},
|
||||
"id": 23,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "sum by (status) (rate(updatecenter_checkins_total[5m]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "status={{status}}"
|
||||
}
|
||||
],
|
||||
"title": "Agent Check-in Rate by status (5m)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "#4ade80", "value": null },
|
||||
{ "color": "#f87171", "value": 1 }
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 21
|
||||
},
|
||||
"id": 24,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["sum"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "increase(updatecenter_signature_verify_failures_total[1h])",
|
||||
"refId": "A",
|
||||
"legendFormat": "Sig Verify Failures (1h)"
|
||||
}
|
||||
],
|
||||
"title": "Signature Verify Failures (1h)",
|
||||
"transparent": true,
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 21
|
||||
},
|
||||
"id": 25,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "sum by (slug, channel) (rate(updatecenter_release_publishes_total[5m]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{slug}}/{{channel}}"
|
||||
}
|
||||
],
|
||||
"title": "Release Publishes rate by slug/channel (5m)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 1,
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 29
|
||||
},
|
||||
"id": 26,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "sum by (kind, status) (rate(updatecenter_bundle_downloads_total[5m]))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{kind}} / {{status}}"
|
||||
}
|
||||
],
|
||||
"title": "Bundle Download Requests by kind/status (5m)",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 20
|
||||
},
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "#4ade80", "value": null },
|
||||
{ "color": "#f87171", "value": 0.01 }
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 29
|
||||
},
|
||||
"id": 27,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": ["mean", "lastNotNull"]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "fffjikve8llhce"
|
||||
},
|
||||
"expr": "rate(updatecenter_signature_verify_failures_total[5m])",
|
||||
"refId": "A",
|
||||
"legendFormat": "Sig verify failures/s"
|
||||
}
|
||||
],
|
||||
"title": "Signature Verify Failure Rate (5m) — Critical if >0",
|
||||
"transparent": true,
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"blue-jay",
|
||||
"flowercore",
|
||||
"synthetic",
|
||||
"updatecenter",
|
||||
"otel"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "FlowerCore.UpdateCenter Dashboard",
|
||||
"uid": "fc-updatecenter",
|
||||
"version": 2
|
||||
}
|
||||
226
apps/monitoring/flowercore-remotedesktop-grafana-dashboard.json
Normal file
226
apps/monitoring/flowercore-remotedesktop-grafana-dashboard.json
Normal file
@@ -0,0 +1,226 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (event) (increase(fc_desktop_session_events_total[$__rate_interval]))",
|
||||
"legendFormat": "{{event}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "RemoteDesktop Session Events",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"showUnfilled": true
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "sum by (template, event) (increase(fc_desktop_session_events_total[24h]))",
|
||||
"legendFormat": "{{template}} {{event}}",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "24h Session Events By Template",
|
||||
"type": "bargauge"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "fc_desktop_pool_ready",
|
||||
"legendFormat": "{{template}} ready",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "fc_desktop_pool_desired",
|
||||
"legendFormat": "{{template}} desired",
|
||||
"range": true,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Warm Pool Ready vs Desired",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "${DS_PROMETHEUS}"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"editorMode": "code",
|
||||
"expr": "sum(increase(fc_desktop_session_events_total{event=\"connect\",browser_datasource=\"json\"}[24h])) - sum(increase(fc_desktop_session_events_total{event=\"disconnect\"}[24h]))",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "24h Connect Minus Disconnect",
|
||||
"type": "stat"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"flowercore",
|
||||
"remotedesktop",
|
||||
"guacamole"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "FlowerCore RemoteDesktop",
|
||||
"uid": "flowercore-remotedesktop",
|
||||
"version": 1
|
||||
}
|
||||
24
tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj
Normal file
24
tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj
Normal file
@@ -0,0 +1,24 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsPackable>false</IsPackable>
|
||||
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="coverlet.collector" Version="6.0.2">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="FluentAssertions" Version="6.12.1" />
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.12.0" />
|
||||
<PackageReference Include="xunit" Version="2.9.2" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.8.2">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
<PackageReference Include="YamlDotNet" Version="16.2.0" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
633
tests/bluejay-infra-lint/FleetManifestLintTests.cs
Normal file
633
tests/bluejay-infra-lint/FleetManifestLintTests.cs
Normal file
@@ -0,0 +1,633 @@
|
||||
using FluentAssertions;
|
||||
using System.Text.RegularExpressions;
|
||||
using Xunit;
|
||||
using YamlDotNet.Core;
|
||||
using YamlDotNet.RepresentationModel;
|
||||
|
||||
namespace BluejayInfraLint.Tests;
|
||||
|
||||
[Trait("Category", "Unit")]
|
||||
public sealed class FleetManifestLintTests
|
||||
{
|
||||
private static readonly ManifestInventory Inventory = ManifestInventory.Load();
|
||||
|
||||
private static readonly HashSet<string> PublicReadOnlyHosts = new(StringComparer.Ordinal)
|
||||
{
|
||||
"dist.flowercore.io",
|
||||
"dns.iamworkin.lan",
|
||||
};
|
||||
|
||||
// Public hosts that allow a tightly bounded write surface in addition to
|
||||
// GET/HEAD. updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
|
||||
// (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
|
||||
// PUT/PATCH/DELETE must still 404 at the route. Anything wider than this
|
||||
// set should fail this lint.
|
||||
private static readonly HashSet<string> PublicReadWriteAllowlistHosts = new(StringComparer.Ordinal)
|
||||
{
|
||||
"updatecenter.iamworkin.lan",
|
||||
"updates.iamworkin.lan",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> ApiKeyProtectedDeployments = new(StringComparer.Ordinal)
|
||||
{
|
||||
"messageboard-web",
|
||||
"scoreboard-web",
|
||||
"segmentdisplay-web",
|
||||
"signalcontrol-web",
|
||||
};
|
||||
|
||||
private static readonly HashSet<string> PublicEgressDeployments = new(StringComparer.Ordinal)
|
||||
{
|
||||
"asterisk",
|
||||
"fc-llm-bridge",
|
||||
"mysql-web",
|
||||
"php-web",
|
||||
"ttsreader-align",
|
||||
"ttsreader-kokoro",
|
||||
"ttsreader-modern",
|
||||
"ttsreader-piper",
|
||||
};
|
||||
|
||||
[Fact]
|
||||
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "IngressRoute")
|
||||
.SelectMany(document =>
|
||||
document.MappingSequence("spec", "routes")
|
||||
.SelectMany(route =>
|
||||
route.MappingSequence("services")
|
||||
.Select(service => new
|
||||
{
|
||||
Document = document,
|
||||
ServiceName = ManifestNodeExtensions.Scalar(service, "name"),
|
||||
ServiceNamespace = ManifestNodeExtensions.Scalar(service, "namespace"),
|
||||
})))
|
||||
.Where(entry => !string.IsNullOrWhiteSpace(entry.ServiceNamespace))
|
||||
.Where(entry => !string.Equals(entry.ServiceNamespace, entry.Document.Namespace, StringComparison.Ordinal))
|
||||
.Select(entry =>
|
||||
$"{entry.Document.Descriptor} references Service '{entry.ServiceName}' in namespace '{entry.ServiceNamespace}'.")
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PublicReadOnlyIngressRoutes_MustExplicitlyAllowOnlyGetAndHead()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "IngressRoute")
|
||||
.SelectMany(document =>
|
||||
document.MappingSequence("spec", "routes")
|
||||
.Select(route => new
|
||||
{
|
||||
Document = document,
|
||||
Match = ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty,
|
||||
}))
|
||||
.Where(entry => PublicReadOnlyHosts.Any(host => entry.Match.Contains($"Host(`{host}`)", StringComparison.Ordinal)))
|
||||
.Where(entry => !entry.Match.Contains("Method(`GET`)", StringComparison.Ordinal)
|
||||
|| !entry.Match.Contains("Method(`HEAD`)", StringComparison.Ordinal))
|
||||
.Select(entry => $"{entry.Document.Descriptor} is missing an explicit GET/HEAD method allowlist.")
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PublicReadWriteIngressRoutes_MustPinGetHeadPostOptionsAllowlist()
|
||||
{
|
||||
// For hosts in PublicReadWriteAllowlistHosts, the route match MUST
|
||||
// contain Method(`GET`), Method(`HEAD`), Method(`POST`), and
|
||||
// Method(`OPTIONS`) AND MUST NOT contain Method(`PUT`),
|
||||
// Method(`PATCH`), or Method(`DELETE`). This keeps the public
|
||||
// allowlist invariant against regression — see Track A's
|
||||
// updatecenter-web ingressroute hardening.
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "IngressRoute")
|
||||
.SelectMany(document =>
|
||||
document.MappingSequence("spec", "routes")
|
||||
.Select(route => new
|
||||
{
|
||||
Document = document,
|
||||
Match = ManifestNodeExtensions.Scalar(route, "match") ?? string.Empty,
|
||||
}))
|
||||
.Where(entry => PublicReadWriteAllowlistHosts.Any(host => entry.Match.Contains($"Host(`{host}`)", StringComparison.Ordinal)))
|
||||
.SelectMany(entry =>
|
||||
{
|
||||
var localViolations = new List<string>();
|
||||
|
||||
foreach (var required in new[] { "GET", "HEAD", "POST", "OPTIONS" })
|
||||
{
|
||||
if (!entry.Match.Contains($"Method(`{required}`)", StringComparison.Ordinal))
|
||||
{
|
||||
localViolations.Add($"{entry.Document.Descriptor} is missing required Method(`{required}`).");
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var forbidden in new[] { "PUT", "PATCH", "DELETE" })
|
||||
{
|
||||
if (entry.Match.Contains($"Method(`{forbidden}`)", StringComparison.Ordinal))
|
||||
{
|
||||
localViolations.Add($"{entry.Document.Descriptor} must not include Method(`{forbidden}`) on a public host.");
|
||||
}
|
||||
}
|
||||
|
||||
return localViolations;
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TraefikVipNetworkPolicies_MustAllowPostDnatBackendPorts()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "NetworkPolicy")
|
||||
.Where(document => document.AllScalars().Any(value => value.Contains("10.0.56.200", StringComparison.Ordinal)))
|
||||
.SelectMany(document =>
|
||||
{
|
||||
var ports = document.EgressPorts().ToHashSet(StringComparer.Ordinal);
|
||||
var localViolations = new List<string>();
|
||||
|
||||
if (ports.Contains("443") && !ports.Contains("8443"))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} allows Traefik VIP 443 without backend port 8443.");
|
||||
}
|
||||
|
||||
if (ports.Contains("80") && !ports.Contains("8000") && !ports.Contains("8080"))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} allows Traefik VIP 80 without a backend HTTP port (8000/8080).");
|
||||
}
|
||||
|
||||
return localViolations;
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ApiKeyProtectedDeployments_MustUseTcpSocketHealthProbes()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "Deployment")
|
||||
.Where(document => ApiKeyProtectedDeployments.Contains(document.Name))
|
||||
.SelectMany(document => document.ContainerMappings().SelectMany(container =>
|
||||
ProbeViolations(document, container, "readinessProbe")
|
||||
.Concat(ProbeViolations(document, container, "livenessProbe"))))
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.Kind == "StatefulSet")
|
||||
.Where(document => document.MappingSequence("spec", "volumeClaimTemplates").Count > 0)
|
||||
.SelectMany(document =>
|
||||
{
|
||||
var localViolations = new List<string>();
|
||||
|
||||
if (string.IsNullOrWhiteSpace(document.Scalar("spec", "podManagementPolicy")))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} is missing spec.podManagementPolicy.");
|
||||
}
|
||||
|
||||
if (string.IsNullOrWhiteSpace(document.Scalar("spec", "revisionHistoryLimit")))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} is missing spec.revisionHistoryLimit.");
|
||||
}
|
||||
|
||||
foreach (var claimTemplate in document.MappingSequence("spec", "volumeClaimTemplates"))
|
||||
{
|
||||
if (!string.Equals(
|
||||
ManifestNodeExtensions.Scalar(claimTemplate, "spec", "volumeMode"),
|
||||
"Filesystem",
|
||||
StringComparison.Ordinal))
|
||||
{
|
||||
var claimName = ManifestNodeExtensions.Scalar(claimTemplate, "metadata", "name") ?? "<unnamed>";
|
||||
localViolations.Add($"{document.Descriptor} volumeClaimTemplate '{claimName}' is missing volumeMode: Filesystem.");
|
||||
}
|
||||
}
|
||||
|
||||
return localViolations;
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void LocallyImportedImages_MustUseLocalhostPrefixAndNeverPullPolicy()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.PodSpec() is not null)
|
||||
.SelectMany(document => document.ContainerSpecs()
|
||||
.Where(container => !string.IsNullOrWhiteSpace(container.Image))
|
||||
.Select(container => new
|
||||
{
|
||||
Document = document,
|
||||
Container = container,
|
||||
}))
|
||||
.Where(entry =>
|
||||
(entry.Container.Image.StartsWith("localhost/", StringComparison.Ordinal)
|
||||
&& !string.Equals(entry.Container.ImagePullPolicy, "Never", StringComparison.Ordinal))
|
||||
|| (entry.Container.Image.StartsWith("fc-", StringComparison.Ordinal)
|
||||
&& !entry.Container.Image.Contains('/', StringComparison.Ordinal)))
|
||||
.Select(entry =>
|
||||
{
|
||||
if (entry.Container.Image.StartsWith("localhost/", StringComparison.Ordinal))
|
||||
{
|
||||
return $"{entry.Document.Descriptor} container '{entry.Container.Name}' uses {entry.Container.Image} without imagePullPolicy: Never.";
|
||||
}
|
||||
|
||||
return $"{entry.Document.Descriptor} container '{entry.Container.Name}' uses non-local image '{entry.Container.Image}' for a node-imported FlowerCore workload.";
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PublicEgressDeployments_MustOptOutOfIamworkinLanSearchSuffixes()
|
||||
{
|
||||
var violations = Inventory.Documents
|
||||
.Where(document => document.PodSpec() is not null)
|
||||
.Where(document => PublicEgressDeployments.Contains(document.Name))
|
||||
.SelectMany(document =>
|
||||
{
|
||||
var localViolations = new List<string>();
|
||||
var podSpec = document.PodSpec()!;
|
||||
var dnsPolicy = ManifestNodeExtensions.Scalar(podSpec, "dnsPolicy");
|
||||
var searches = ManifestNodeExtensions.ScalarSequence(podSpec, "dnsConfig", "searches").ToList();
|
||||
|
||||
if (!string.Equals(dnsPolicy, "None", StringComparison.Ordinal))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} is missing dnsPolicy: None.");
|
||||
}
|
||||
|
||||
if (searches.Count == 0)
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} is missing dnsConfig.searches.");
|
||||
}
|
||||
else if (searches.Any(search => search.Contains("iamworkin.lan", StringComparison.OrdinalIgnoreCase)))
|
||||
{
|
||||
localViolations.Add($"{document.Descriptor} still includes iamworkin.lan in dnsConfig.searches.");
|
||||
}
|
||||
|
||||
return localViolations;
|
||||
})
|
||||
.ToList();
|
||||
|
||||
violations.Should().BeEmpty();
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ProbeViolations(
|
||||
ManifestDocument document,
|
||||
YamlMappingNode container,
|
||||
string probeKey)
|
||||
{
|
||||
if (!ManifestNodeExtensions.TryGetMapping(container, probeKey, out var probe)
|
||||
|| !ManifestNodeExtensions.TryGetMapping(probe, "httpGet", out var httpGet))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var path = ManifestNodeExtensions.Scalar(httpGet, "path");
|
||||
if (!string.Equals(path, "/health", StringComparison.Ordinal))
|
||||
{
|
||||
return Array.Empty<string>();
|
||||
}
|
||||
|
||||
var containerName = ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>";
|
||||
return new[]
|
||||
{
|
||||
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed class ManifestInventory
|
||||
{
|
||||
private ManifestInventory(string workspaceRoot, string bluejayRoot, IReadOnlyList<ManifestDocument> documents)
|
||||
{
|
||||
WorkspaceRoot = workspaceRoot;
|
||||
BluejayRoot = bluejayRoot;
|
||||
Documents = documents;
|
||||
}
|
||||
|
||||
public string WorkspaceRoot { get; }
|
||||
|
||||
public string BluejayRoot { get; }
|
||||
|
||||
public IReadOnlyList<ManifestDocument> Documents { get; }
|
||||
|
||||
public static ManifestInventory Load()
|
||||
{
|
||||
var bluejayRoot = FindBluejayInfraRoot();
|
||||
var workspaceRoot = Directory.GetParent(bluejayRoot)?.FullName
|
||||
?? throw new DirectoryNotFoundException($"Could not resolve workspace root from '{bluejayRoot}'.");
|
||||
|
||||
var documents = ManifestRoots(workspaceRoot, bluejayRoot)
|
||||
.SelectMany(LoadDocumentsFromRoot)
|
||||
.ToList();
|
||||
|
||||
return new ManifestInventory(workspaceRoot, bluejayRoot, documents);
|
||||
}
|
||||
|
||||
private static string FindBluejayInfraRoot()
|
||||
{
|
||||
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||
while (current is not null)
|
||||
{
|
||||
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||
{
|
||||
return current.FullName;
|
||||
}
|
||||
|
||||
current = current.Parent;
|
||||
}
|
||||
|
||||
throw new DirectoryNotFoundException("Could not find the bluejay-infra repository root from the test output directory.");
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ManifestRoots(string workspaceRoot, string bluejayRoot)
|
||||
{
|
||||
var roots = new[]
|
||||
{
|
||||
Path.Combine(bluejayRoot, "apps"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Chat", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.DMS", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.DNS", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Intranet.Web", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Kiosk", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Media", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.MenuBoard", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.MessageBoard", "k8s"),
|
||||
// FlowerCore.Notes/k8s/selenium/ is the live Selenium Grid
|
||||
// manifest tree (consumed by deploy-selenium scripts).
|
||||
// FlowerCore.Notes/k8s/guacamole/ + FlowerCore.Notes/k8s/monitoring/
|
||||
// are historical scaffolds that have diverged from the live state
|
||||
// (bluejay-infra/apps/guacamole + bluejay-infra/apps/monitoring are
|
||||
// canonical). Operator review is required before bringing them in
|
||||
// line OR decommissioning them — keep them out of the lint scope
|
||||
// until that decision lands. See xxl-regroup-2026-05-03-followup.md
|
||||
// "Codex 7 §0 stop conditions" + the C7 close-session output.
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Notes", "k8s", "selenium"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.MySQL", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.PHP", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Presentations", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Print.Web", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.RemoteDesktop", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Scoreboard", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.SegmentDisplay", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.SignalControl", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.TtsReader", "k8s"),
|
||||
Path.Combine(workspaceRoot, "FlowerCore.Updater", "k8s"),
|
||||
};
|
||||
|
||||
return roots.Where(Directory.Exists);
|
||||
}
|
||||
|
||||
private static IEnumerable<ManifestDocument> LoadDocumentsFromRoot(string root)
|
||||
{
|
||||
foreach (var filePath in Directory.EnumerateFiles(root, "*.yaml", SearchOption.AllDirectories))
|
||||
{
|
||||
var fileText = File.ReadAllText(filePath);
|
||||
var segments = SplitManifestDocuments(fileText);
|
||||
|
||||
for (var index = 0; index < segments.Count; index++)
|
||||
{
|
||||
var yaml = new YamlStream();
|
||||
try
|
||||
{
|
||||
using var reader = new StringReader(segments[index]);
|
||||
yaml.Load(reader);
|
||||
}
|
||||
catch (YamlException exception)
|
||||
{
|
||||
_ = exception;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (yaml.Documents.Count == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (yaml.Documents[0].RootNode is YamlMappingNode mapping
|
||||
&& ManifestNodeExtensions.Scalar(mapping, "kind") is not null)
|
||||
{
|
||||
yield return new ManifestDocument(root, filePath, index, fileText, mapping);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IReadOnlyList<string> SplitManifestDocuments(string fileText)
|
||||
{
|
||||
var documents = new List<string>();
|
||||
var currentLines = new List<string>();
|
||||
var seenApiVersion = false;
|
||||
|
||||
foreach (var line in Regex.Split(fileText, @"\r?\n"))
|
||||
{
|
||||
if (Regex.IsMatch(line, @"^\s*---\s*$"))
|
||||
{
|
||||
FlushCurrentDocument();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Regex.IsMatch(line, @"^\s*apiVersion:\s*")
|
||||
&& seenApiVersion
|
||||
&& currentLines.Any(existing => !string.IsNullOrWhiteSpace(existing)))
|
||||
{
|
||||
FlushCurrentDocument();
|
||||
}
|
||||
|
||||
currentLines.Add(line);
|
||||
if (Regex.IsMatch(line, @"^\s*apiVersion:\s*"))
|
||||
{
|
||||
seenApiVersion = true;
|
||||
}
|
||||
}
|
||||
|
||||
FlushCurrentDocument();
|
||||
return documents;
|
||||
|
||||
void FlushCurrentDocument()
|
||||
{
|
||||
var text = string.Join(Environment.NewLine, currentLines).Trim();
|
||||
if (!string.IsNullOrWhiteSpace(text))
|
||||
{
|
||||
documents.Add(text);
|
||||
}
|
||||
|
||||
currentLines.Clear();
|
||||
seenApiVersion = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed record ManifestDocument(
|
||||
string RootPath,
|
||||
string FilePath,
|
||||
int DocumentIndex,
|
||||
string FileText,
|
||||
YamlMappingNode Root)
|
||||
{
|
||||
public string Kind => Scalar("kind") ?? string.Empty;
|
||||
|
||||
public string Name => Scalar("metadata", "name") ?? $"document-{DocumentIndex}";
|
||||
|
||||
public string Namespace => Scalar("metadata", "namespace") ?? string.Empty;
|
||||
|
||||
public string RelativePath => Path.GetRelativePath(RootPath, FilePath).Replace('\\', '/');
|
||||
|
||||
public string Descriptor => $"{Kind} {Namespace}/{Name} [{RelativePath}#{DocumentIndex + 1}]";
|
||||
|
||||
public string? Scalar(params string[] path) => ManifestNodeExtensions.Scalar(Root, path);
|
||||
|
||||
public IReadOnlyList<YamlMappingNode> MappingSequence(params string[] path) => ManifestNodeExtensions.MappingSequence(Root, path);
|
||||
|
||||
public IEnumerable<string> AllScalars() => ManifestNodeExtensions.AllScalars(Root);
|
||||
|
||||
public IReadOnlyList<string> EgressPorts()
|
||||
{
|
||||
return MappingSequence("spec", "egress")
|
||||
.SelectMany(egressRule => ManifestNodeExtensions.MappingSequence(egressRule, "ports"))
|
||||
.Select(portMapping => ManifestNodeExtensions.Scalar(portMapping, "port"))
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.Cast<string>()
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public YamlMappingNode? PodSpec()
|
||||
{
|
||||
return Kind switch
|
||||
{
|
||||
"Deployment" or "StatefulSet" or "DaemonSet" or "Job" =>
|
||||
ManifestNodeExtensions.Mapping(Root, "spec", "template", "spec"),
|
||||
"CronJob" =>
|
||||
ManifestNodeExtensions.Mapping(Root, "spec", "jobTemplate", "spec", "template", "spec"),
|
||||
_ => null,
|
||||
};
|
||||
}
|
||||
|
||||
public IReadOnlyList<YamlMappingNode> ContainerMappings()
|
||||
{
|
||||
var podSpec = PodSpec();
|
||||
if (podSpec is null)
|
||||
{
|
||||
return Array.Empty<YamlMappingNode>();
|
||||
}
|
||||
|
||||
return ManifestNodeExtensions.MappingSequence(podSpec, "containers")
|
||||
.Concat(ManifestNodeExtensions.MappingSequence(podSpec, "initContainers"))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public IReadOnlyList<ContainerSpec> ContainerSpecs()
|
||||
{
|
||||
return ContainerMappings()
|
||||
.Select(container => new ContainerSpec(
|
||||
ManifestNodeExtensions.Scalar(container, "name") ?? "<unnamed>",
|
||||
ManifestNodeExtensions.Scalar(container, "image") ?? string.Empty,
|
||||
ManifestNodeExtensions.Scalar(container, "imagePullPolicy") ?? string.Empty))
|
||||
.ToList();
|
||||
}
|
||||
}
|
||||
|
||||
internal sealed record ContainerSpec(string Name, string Image, string ImagePullPolicy);
|
||||
|
||||
internal static class ManifestNodeExtensions
|
||||
{
|
||||
public static string? Scalar(this YamlMappingNode mapping, params string[] path)
|
||||
{
|
||||
return TryGetNode(mapping, path, out var node) && node is YamlScalarNode scalar
|
||||
? scalar.Value
|
||||
: null;
|
||||
}
|
||||
|
||||
public static YamlMappingNode? Mapping(this YamlMappingNode mapping, params string[] path)
|
||||
{
|
||||
return TryGetNode(mapping, path, out var node) ? node as YamlMappingNode : null;
|
||||
}
|
||||
|
||||
public static bool TryGetMapping(this YamlMappingNode mapping, string key, out YamlMappingNode result)
|
||||
{
|
||||
if (TryGetChild(mapping, key, out var child) && child is YamlMappingNode childMapping)
|
||||
{
|
||||
result = childMapping;
|
||||
return true;
|
||||
}
|
||||
|
||||
result = null!;
|
||||
return false;
|
||||
}
|
||||
|
||||
public static IReadOnlyList<YamlMappingNode> MappingSequence(this YamlMappingNode mapping, params string[] path)
|
||||
{
|
||||
return TryGetNode(mapping, path, out var node) && node is YamlSequenceNode sequence
|
||||
? sequence.Children.OfType<YamlMappingNode>().ToList()
|
||||
: Array.Empty<YamlMappingNode>();
|
||||
}
|
||||
|
||||
public static IReadOnlyList<string> ScalarSequence(this YamlMappingNode mapping, params string[] path)
|
||||
{
|
||||
return TryGetNode(mapping, path, out var node) && node is YamlSequenceNode sequence
|
||||
? sequence.Children.OfType<YamlScalarNode>()
|
||||
.Select(child => child.Value)
|
||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||
.Cast<string>()
|
||||
.ToList()
|
||||
: Array.Empty<string>();
|
||||
}
|
||||
|
||||
public static IEnumerable<string> AllScalars(YamlNode node)
|
||||
{
|
||||
return node switch
|
||||
{
|
||||
YamlScalarNode scalar when !string.IsNullOrWhiteSpace(scalar.Value) => new[] { scalar.Value! },
|
||||
YamlSequenceNode sequence => sequence.Children.SelectMany(AllScalars),
|
||||
YamlMappingNode mapping => mapping.Children.SelectMany(entry => AllScalars(entry.Key).Concat(AllScalars(entry.Value))),
|
||||
_ => Array.Empty<string>(),
|
||||
};
|
||||
}
|
||||
|
||||
private static bool TryGetNode(YamlMappingNode mapping, IReadOnlyList<string> path, out YamlNode node)
|
||||
{
|
||||
YamlNode current = mapping;
|
||||
foreach (var segment in path)
|
||||
{
|
||||
if (current is not YamlMappingNode currentMapping || !TryGetChild(currentMapping, segment, out current))
|
||||
{
|
||||
node = null!;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
node = current;
|
||||
return true;
|
||||
}
|
||||
|
||||
private static bool TryGetChild(YamlMappingNode mapping, string key, out YamlNode value)
|
||||
{
|
||||
foreach (var entry in mapping.Children)
|
||||
{
|
||||
if (entry.Key is YamlScalarNode scalar
|
||||
&& string.Equals(scalar.Value, key, StringComparison.Ordinal))
|
||||
{
|
||||
value = entry.Value;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
value = null!;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package bluejayinfra.cross_namespace_ingressroute
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
ns := object.get(input.metadata, "namespace", "")
|
||||
route := input.spec.routes[_]
|
||||
service := route.services[_]
|
||||
svc_ns := object.get(service, "namespace", "")
|
||||
svc_ns != ""
|
||||
svc_ns != ns
|
||||
msg := sprintf("IngressRoute %s/%s references Service %s in namespace %s", [ns, input.metadata.name, service.name, svc_ns])
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package bluejayinfra.public_method_allowlist
|
||||
|
||||
public_hosts := {"dist.flowercore.io", "dns.iamworkin.lan"}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
route := input.spec.routes[_]
|
||||
match := object.get(route, "match", "")
|
||||
host := public_hosts[_]
|
||||
contains(match, sprintf("Host(`%s`)", [host]))
|
||||
not contains(match, "Method(`GET`)")
|
||||
msg := sprintf("IngressRoute %s/%s is missing Method(GET) for public read-only host %s", [input.metadata.namespace, input.metadata.name, host])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
route := input.spec.routes[_]
|
||||
match := object.get(route, "match", "")
|
||||
host := public_hosts[_]
|
||||
contains(match, sprintf("Host(`%s`)", [host]))
|
||||
not contains(match, "Method(`HEAD`)")
|
||||
msg := sprintf("IngressRoute %s/%s is missing Method(HEAD) for public read-only host %s", [input.metadata.namespace, input.metadata.name, host])
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package bluejayinfra.traefik_vip_backend_ports
|
||||
|
||||
has_vip {
|
||||
some i
|
||||
some j
|
||||
input.spec.egress[i].to[j].ipBlock.cidr == "10.0.56.200/32"
|
||||
}
|
||||
|
||||
has_port(port) {
|
||||
some i
|
||||
some j
|
||||
input.spec.egress[i].ports[j].port == port
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "NetworkPolicy"
|
||||
has_vip
|
||||
has_port(443)
|
||||
not has_port(8443)
|
||||
msg := sprintf("NetworkPolicy %s/%s allows 10.0.56.200:443 without backend port 8443", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "NetworkPolicy"
|
||||
has_vip
|
||||
has_port(80)
|
||||
not has_port(8080)
|
||||
not has_port(8000)
|
||||
msg := sprintf("NetworkPolicy %s/%s allows 10.0.56.200:80 without backend HTTP port 8080 or 8000", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package bluejayinfra.auth_probe_path
|
||||
|
||||
protected_deployments := {
|
||||
"messageboard-web",
|
||||
"scoreboard-web",
|
||||
"segmentdisplay-web",
|
||||
"signalcontrol-web",
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "Deployment"
|
||||
protected_deployments[input.metadata.name]
|
||||
container := input.spec.template.spec.containers[_]
|
||||
probe := object.get(container, "readinessProbe", {})
|
||||
http_get := object.get(probe, "httpGet", {})
|
||||
object.get(http_get, "path", "") == "/health"
|
||||
msg := sprintf("Deployment %s/%s must not use readinessProbe.httpGet /health behind API key middleware", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "Deployment"
|
||||
protected_deployments[input.metadata.name]
|
||||
container := input.spec.template.spec.containers[_]
|
||||
probe := object.get(container, "livenessProbe", {})
|
||||
http_get := object.get(probe, "httpGet", {})
|
||||
object.get(http_get, "path", "") == "/health"
|
||||
msg := sprintf("Deployment %s/%s must not use livenessProbe.httpGet /health behind API key middleware", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
package bluejayinfra.statefulset_volumeclaim_defaults
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "StatefulSet"
|
||||
count(object.get(input.spec, "volumeClaimTemplates", [])) > 0
|
||||
object.get(input.spec, "podManagementPolicy", "") == ""
|
||||
msg := sprintf("StatefulSet %s/%s is missing spec.podManagementPolicy", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "StatefulSet"
|
||||
count(object.get(input.spec, "volumeClaimTemplates", [])) > 0
|
||||
object.get(input.spec, "revisionHistoryLimit", 0) == 0
|
||||
msg := sprintf("StatefulSet %s/%s is missing spec.revisionHistoryLimit", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "StatefulSet"
|
||||
claim := input.spec.volumeClaimTemplates[_]
|
||||
object.get(claim.spec, "volumeMode", "") != "Filesystem"
|
||||
claim_name := object.get(claim.metadata, "name", "<unnamed>")
|
||||
msg := sprintf("StatefulSet %s/%s volumeClaimTemplate %s is missing volumeMode: Filesystem", [input.metadata.namespace, input.metadata.name, claim_name])
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
package bluejayinfra.localhost_image_pull_policy
|
||||
|
||||
pod_spec(spec) = pod {
|
||||
input.kind == "Deployment"
|
||||
pod := spec.template.spec
|
||||
}
|
||||
|
||||
pod_spec(spec) = pod {
|
||||
input.kind == "StatefulSet"
|
||||
pod := spec.template.spec
|
||||
}
|
||||
|
||||
pod_spec(spec) = pod {
|
||||
input.kind == "DaemonSet"
|
||||
pod := spec.template.spec
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
pod := pod_spec(input.spec)
|
||||
container := pod.containers[_]
|
||||
startswith(object.get(container, "image", ""), "localhost/")
|
||||
object.get(container, "imagePullPolicy", "") != "Never"
|
||||
msg := sprintf("%s/%s container %s uses a localhost image without imagePullPolicy: Never", [input.metadata.namespace, input.metadata.name, container.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
pod := pod_spec(input.spec)
|
||||
container := pod.initContainers[_]
|
||||
startswith(object.get(container, "image", ""), "localhost/")
|
||||
object.get(container, "imagePullPolicy", "") != "Never"
|
||||
msg := sprintf("%s/%s initContainer %s uses a localhost image without imagePullPolicy: Never", [input.metadata.namespace, input.metadata.name, container.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
pod := pod_spec(input.spec)
|
||||
container := pod.containers[_]
|
||||
startswith(object.get(container, "image", ""), "fc-")
|
||||
not contains(object.get(container, "image", ""), "/")
|
||||
msg := sprintf("%s/%s container %s uses a non-localhost FlowerCore image reference %s", [input.metadata.namespace, input.metadata.name, container.name, container.image])
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package bluejayinfra.public_egress_dns_none
|
||||
|
||||
public_egress_workloads := {
|
||||
"asterisk",
|
||||
"fc-llm-bridge",
|
||||
"mysql-web",
|
||||
"php-web",
|
||||
"ttsreader-align",
|
||||
"ttsreader-kokoro",
|
||||
"ttsreader-modern",
|
||||
"ttsreader-piper",
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "Deployment"
|
||||
public_egress_workloads[input.metadata.name]
|
||||
object.get(input.spec.template.spec, "dnsPolicy", "") != "None"
|
||||
msg := sprintf("Deployment %s/%s must set dnsPolicy: None for public-internet egress", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "Deployment"
|
||||
public_egress_workloads[input.metadata.name]
|
||||
search := object.get(object.get(input.spec.template.spec, "dnsConfig", {}), "searches", [])[_]
|
||||
contains(lower(search), "iamworkin.lan")
|
||||
msg := sprintf("Deployment %s/%s must not include iamworkin.lan in dnsConfig.searches", [input.metadata.namespace, input.metadata.name])
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package bluejayinfra.public_readwrite_allowlist
|
||||
|
||||
# Public hosts that allow a tightly bounded write surface in addition to
|
||||
# GET/HEAD. updatecenter.iamworkin.lan accepts POST /api/v1/checkin/{id}
|
||||
# (bootstrap-JWT) so its allowlist is GET||HEAD||POST||OPTIONS — but
|
||||
# PUT/PATCH/DELETE must still 404 at the route. Any host in this set MUST
|
||||
# include all four required methods AND MUST NOT include any forbidden
|
||||
# method.
|
||||
public_readwrite_hosts := {"updatecenter.iamworkin.lan", "updates.iamworkin.lan"}
|
||||
|
||||
required_methods := {"GET", "HEAD", "POST", "OPTIONS"}
|
||||
|
||||
forbidden_methods := {"PUT", "PATCH", "DELETE"}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
route := input.spec.routes[_]
|
||||
match := object.get(route, "match", "")
|
||||
host := public_readwrite_hosts[_]
|
||||
contains(match, sprintf("Host(`%s`)", [host]))
|
||||
required := required_methods[_]
|
||||
not contains(match, sprintf("Method(`%s`)", [required]))
|
||||
msg := sprintf("IngressRoute %s/%s is missing required Method(%s) for public read-write host %s", [input.metadata.namespace, input.metadata.name, required, host])
|
||||
}
|
||||
|
||||
deny[msg] {
|
||||
input.kind == "IngressRoute"
|
||||
route := input.spec.routes[_]
|
||||
match := object.get(route, "match", "")
|
||||
host := public_readwrite_hosts[_]
|
||||
contains(match, sprintf("Host(`%s`)", [host]))
|
||||
forbidden := forbidden_methods[_]
|
||||
contains(match, sprintf("Method(`%s`)", [forbidden]))
|
||||
msg := sprintf("IngressRoute %s/%s must not include Method(%s) on public read-write host %s", [input.metadata.namespace, input.metadata.name, forbidden, host])
|
||||
}
|
||||
Reference in New Issue
Block a user