fix(agent-zero): prefix bridge embedding alias for litellm

fix(agent-zero): keep internal util/embed on bridge v1
chore(bridge): bump fc-llm-bridge image tag v202604292028
2026-04-29 21:14:12 -05:00 · 2026-04-29 21:09:04 -05:00 · 2026-04-29 20:50:55 -05:00 · 2026-04-29 20:50:55 -05:00 · 2026-04-29 19:14:01 -05:00 · 2026-04-29 18:04:43 -05:00
16 changed files with 1931 additions and 163 deletions
--- a/apps/agent-zero/agent-zero.yaml
+++ b/apps/agent-zero/agent-zero.yaml
@@ -2,14 +2,15 @@
 # Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
 # =============================================================================
 # Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
-# Ollama: workstation-first via BLUEJAY-WS (10.0.56.20:11434) with edge1 Pi 5
-# fallback (10.0.57.17:11434)
+# Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434).
+# Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream —
+# the workstation is private dev hardware, not a cluster dependency.
 # Target: RKE2 bare-metal cluster, namespace: agent-zero
 # Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
 #
 # Differences from LOCAL (WSL K3s):
 #   - Uses Longhorn StorageClass (not local-path)
-#   - Prefers workstation Ollama on the R9700, falls back to edge1 Pi 5
+#   - Cluster-only Ollama path (edge1) — keeps workstation private
 #   - NO Anthropic API key (free/local models only)
 #   - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
 #   - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
@@ -91,14 +92,17 @@ subjects:
 # =============================================================================
 # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
 # =============================================================================
-# Connects to a local proxy that routes to workstation Ollama first and edge1 second
-# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
+# Connects directly to fc-llm-bridge for chat + internal util/embed + browser.
+# Agent Zero's internal util/embed slots stay on the bridge's OpenAI-compatible
+# /v1 surface, while browser + corpus-search use the Ollama-compatible /api/*
+# surface through OLLAMA_HOST.
+# Blue Jay profile with 21 tools, 3 prompts, 4 extensions.

 ---
-# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
+# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
 # Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
-# Consumed by the chat_model only; util / embedding / browser stay on local
-# Ollama via the 127.0.0.1 sidecar proxy.
+# Consumed by chat, internal util/embed, browser, and corpus-search requests
+# that traverse fc-llm-bridge.
 apiVersion: onepassword.com/v1
 kind: OnePasswordItem
 metadata:
@@ -107,6 +111,22 @@ metadata:
 spec:
  itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys"

+---
+# Print.Web API key for Agent Zero's print_web.py Python tool.
+# Syncs from 1Password item "Print.Web API Keys" (password field = API key).
+# The print_web.py tool reads PRINT_WEB_API_KEY env var for all HTTP requests
+# to the thermal print service (GET /api/mcp/tools, POST /api/print/*, etc.).
+# Note: Print.Web uses the legacy REST MCP shape (/api/mcp/tools/*), not the
+# streamable-http MCP protocol. The print_web Python tool bridges this gap
+# and is already present in bluejay-tools ConfigMaps.
+apiVersion: onepassword.com/v1
+kind: OnePasswordItem
+metadata:
+  name: print-web-api-keys
+  namespace: agent-zero
+spec:
+  itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
+
 ---
 apiVersion: apps/v1
 kind: Deployment
@@ -118,7 +138,7 @@ metadata:
  annotations:
    agent-zero/deployment: "nuc"
    agent-zero/profile: "bluejay"
-    agent-zero/ollama: "BLUEJAY-WS primary (10.0.56.20:11434), edge1 fallback (10.0.57.17:11434)"
+    agent-zero/ollama: "fc-llm-bridge fronts edge1 Pi 5 + AI HAT+ Ollama for cluster browser/corpus-search traffic; internal chat/util/embed route through the bridge's authenticated OpenAI surface"
 spec:
  replicas: 1
  selector:
@@ -133,19 +153,18 @@ spec:
    spec:
      serviceAccountName: agent-zero
      initContainers:
-        # Wait for either workstation or edge1 Ollama to be reachable before starting Agent Zero.
-        - name: wait-for-ollama
+        # Wait for fc-llm-bridge to be reachable before starting Agent Zero.
+        - name: wait-for-llm-bridge
          image: busybox:1.37
          command: ["sh", "-c"]
          args:
            - |
-              echo "Waiting for Ollama at BLUEJAY-WS or edge1..."
-              until wget -qO- --timeout=2 http://10.0.56.20:11434/api/tags >/dev/null 2>&1 || \
-                    wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
-                echo "No Ollama endpoint ready yet, retrying in 5s..."
+              echo "Waiting for fc-llm-bridge..."
+              until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do
+                echo "fc-llm-bridge not ready yet, retrying in 5s..."
                sleep 5
              done
-              echo "At least one Ollama endpoint is reachable."
+              echo "fc-llm-bridge is reachable."
        # Assemble the Blue Jay profile directory structure from ConfigMaps.
        # ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
        - name: setup-bluejay
@@ -192,50 +211,6 @@ spec:
            - name: bluejay-theme
              mountPath: /tmp/bluejay-theme
      containers:
-        - name: ollama-proxy
-          image: nginx:1.27-alpine
-          command: ["/bin/sh", "-c"]
-          args:
-            - |
-              cat > /etc/nginx/nginx.conf <<'NGINX'
-              worker_processes  1;
-              events { worker_connections 1024; }
-              http {
-                upstream ollama_upstream {
-                  server 10.0.56.20:11434 max_fails=2 fail_timeout=10s;
-                  server 10.0.57.17:11434 backup;
-                  keepalive 16;
-                }
-                server {
-                  listen 11434;
-                  location / {
-                    proxy_http_version 1.1;
-                    proxy_set_header Connection "";
-                    proxy_set_header Host $host;
-                    proxy_connect_timeout 5s;
-                    proxy_read_timeout 600s;
-                    proxy_send_timeout 600s;
-                    proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
-                    proxy_pass http://ollama_upstream;
-                  }
-                }
-              }
-              NGINX
-              exec nginx -g 'daemon off;'
-          ports:
-            - containerPort: 11434
-          readinessProbe:
-            httpGet:
-              path: /api/tags
-              port: 11434
-            initialDelaySeconds: 5
-            periodSeconds: 15
-          livenessProbe:
-            httpGet:
-              path: /api/tags
-              port: 11434
-            initialDelaySeconds: 10
-            periodSeconds: 30
        - name: agent-zero
          image: agent0ai/agent-zero:latest
          command: ["/bin/bash", "-c"]
@@ -256,11 +231,12 @@ spec:
              # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
              # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
              # api_key comes from A0_SET_chat_model_api_key env var (overrides
-              # config.json). util + embedding stay on local 127.0.0.1 Ollama
-              # proxy (workstation primary, edge1 fallback).
+              # config.json). Utility + embedding stay on the authenticated
+              # OpenAI-compatible /v1 surface; browser and direct tool traffic
+              # use the bridge's Ollama-compatible root via OLLAMA_HOST.
              mkdir -p /a0/usr/plugins/_model_config
              cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
-              {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
+              {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
              MODELCFG
              # Strip heredoc indentation
              sed -i 's/^              //' /a0/usr/plugins/_model_config/config.json
@@ -284,8 +260,9 @@ spec:
            # Chat model — routed through FlowerCore LLM Bridge (ADR-088)
            # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
            # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
-            # Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
-            # for zero-latency, zero-cost small-model traffic.
+            # Internal utility + embedding use the authenticated OpenAI surface,
+            # while browser/corpus-search use the bridge's Ollama-compatible
+            # endpoints so Agent Zero no longer needs a local proxy sidecar.
            - name: A0_SET_chat_model_provider
              value: "openai"
            - name: A0_SET_chat_model_name
@@ -307,35 +284,51 @@ spec:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
+            - name: FC_LLM_BRIDGE_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: fc-llm-bridge-api-keys
+                  key: agent-zero-k8s
            - name: A0_SET_chat_model_ctx_length
              value: "8192"
            - name: A0_SET_chat_model_kwargs
              value: '{"temperature": 0, "num_ctx": 8192}'
-            # Utility model — fast small helper tier through the same proxy
+            # Utility model — fast small helper tier through the OpenAI surface
            - name: A0_SET_util_model_provider
-              value: "ollama"
+              value: "openai"
            - name: A0_SET_util_model_name
-              value: "qwen2.5:1.5b"
+              value: "fc:cheap"
            - name: A0_SET_util_model_api_base
-              value: "http://127.0.0.1:11434"
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            - name: A0_SET_util_model_kwargs
              value: '{"num_ctx": 2048}'
-            # Embedding model — nomic through the same proxy
+            # Embedding model — authenticated bridge alias to nomic-embed-text.
+            # LiteLLM's embedding() path needs an explicit provider prefix here
+            # even though the chat slot can use bare fc:* aliases.
            - name: A0_SET_embed_model_provider
-              value: "ollama"
+              value: "openai"
            - name: A0_SET_embed_model_name
-              value: "nomic-embed-text"
+              value: "openai/fc:embedding"
            - name: A0_SET_embed_model_api_base
-              value: "http://127.0.0.1:11434"
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            # Browser model — small Gemma candidate through the same proxy
            - name: A0_SET_browser_model_provider
              value: "ollama"
            - name: A0_SET_browser_model_name
              value: "gemma3:4b"
            - name: A0_SET_browser_model_api_base
-              value: "http://127.0.0.1:11434"
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
+            - name: A0_SET_browser_model_api_key
+              valueFrom:
+                secretKeyRef:
+                  name: fc-llm-bridge-api-keys
+                  key: agent-zero-k8s
            - name: A0_SET_browser_model_vision
              value: "true"
+            - name: OLLAMA_HOST
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
+            - name: FLOWERCORE_AGENTZERO_OLLAMA_URL
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
            # Agent profile — Blue Jay personality, tools, and system prompt
            - name: A0_SET_agent_profile
              value: "bluejay"
@@ -358,9 +351,25 @@ spec:
                  name: chat-mcp-api-key
                  key: api-key
                  optional: true
-            # Print.Web — Thermal printer service on edge2
+            # Print.Web — Thermal printer service on edge2.
+            # PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
+            # runs in-cluster and can reach edge2 directly on the PROD VLAN).
+            # PRINT_WEB_API_KEY: from 1Password "Print.Web API Keys" password field,
+            # synced by the print-web-api-keys OnePasswordItem CRD above.
+            # The print_web.py Python tool reads both env vars for all HTTP calls.
            - name: PRINT_WEB_URL
              value: "http://10.0.57.16:5200"
+            - name: PRINT_WEB_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: print-web-api-keys
+                  key: password
+            # Intranet search — use in-cluster HTTP (no step-ca TLS needed)
+            # corpus_search.py reads FLOWERCORE_FLEET_VECTOR_DIR but that mount is not
+            # on the cluster yet (BLUEJAY-WS only). The tool gracefully returns a
+            # "no DB found" message with rebuild instructions rather than crashing.
+            - name: FLOWERCORE_INTRANET_URL
+              value: "http://intranet-web.intranet.svc:5300"
            # Kubernetes
            - name: KUBERNETES_SERVICE_HOST
              value: "kubernetes.default.svc"
@@ -395,7 +404,7 @@ spec:
              command:
                - /bin/bash
                - -c
-                - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
+                - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null"
            periodSeconds: 30
            failureThreshold: 2
          resources:
@@ -533,18 +542,6 @@ spec:
          protocol: UDP
        - port: 53
          protocol: TCP
-    # Ollama on BLUEJAY-WS
-    - to:
-        - ipBlock:
-            cidr: 10.0.56.20/32
-      ports:
-        - port: 11434
-    # Ollama on edge1 fallback
-    - to:
-        - ipBlock:
-            cidr: 10.0.57.17/32
-      ports:
-        - port: 11434
    # Print.Web on edge2
    - to:
        - ipBlock:
@@ -578,6 +575,15 @@ spec:
          protocol: TCP
        - port: 8080
          protocol: TCP
+    # Intranet search API — use in-cluster svc so traffic stays inside
+    # the cluster and is not blocked by the private-range egress denylist.
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: intranet
+      ports:
+        - port: 5300
+          protocol: TCP
    # Allow internet (for kubectl image pull, etc)
    - to:
        - ipBlock:
--- a/apps/agent-zero/configmaps-bluejay.yaml
+++ b/apps/agent-zero/configmaps-bluejay.yaml
@@ -7209,6 +7209,9 @@ data:
            "keep_alive": keep_alive,
            "stream": False,
        })
+        curl_headers = ["-H", "Content-Type: application/json"]
+        if os.environ.get("FC_LLM_BRIDGE_API_KEY"):
+            curl_headers.extend(["-H", f"X-Api-Key: {os.environ['FC_LLM_BRIDGE_API_KEY']}"])

        try:
            result = subprocess.run(
@@ -7216,7 +7219,7 @@ data:
                    "curl", "-s", "--max-time", "120",
                    "-X", "POST",
                    f"{api_base}/api/generate",
-                    "-H", "Content-Type: application/json",
+                    *curl_headers,
                    "-d", payload,
                ],
                capture_output=True,
@@ -13150,6 +13153,451 @@ data:
    - PowerShell 5.1 compatibility is assumed (no PowerShell 7+ features).
    - All commands run with `-NoProfile -NonInteractive` flags for clean execution.
    """
+  corpus_search.py: |
+    # FlowerCore Fleet Corpus Vector Search Tool
+    #
+    # Queries the AiStation-built SqliteVecVectorStore DB at /a0/usr/vectors/fleet.db
+    # (bind-mounted read-only from /var/lib/flowercore/vector-stores/ on the host).
+    # Embeds the query through Ollama's nomic-embed-text model, computes cosine
+    # similarity against every stored chunk in pure Python (no numpy — not present
+    # in the container), and returns the top-K nearest neighbors with source metadata.
+    #
+    # This is the offline-friendly counterpart to `intranet_search` (which hits the
+    # Intranet's live REST API). Use it for Bible/Greek/Hebrew/Strong's lookups and
+    # anywhere the workstation has a newer DB than the Intranet one. The store is
+    # refreshed by `aistation-indexer build <edition>` — see the FlowerCore.Knowledge
+    # ADR at docs/ai-agents/flowercore-knowledge-service-plan.md.
+
+    import json
+    import math
+    import os
+    import sqlite3
+    import urllib.request
+    from pathlib import Path
+
+    from python.helpers.tool import Tool, Response
+
+
+    DEFAULT_VECTORS_DIR = os.environ.get(
+        "FLOWERCORE_FLEET_VECTOR_DIR",
+        "/a0/usr/vectors",
+    )
+    # When the caller doesn't pick an explicit DB, prefer the biggest fleet tier
+    # present on disk. Workstation → pi-edge → bmo-bot.
+    PREFERRED_DB_ORDER = [
+        os.environ.get("FLOWERCORE_FLEET_VECTOR_DB", ""),
+        "fleet-workstation-full.db",
+        "fleet-pi-edge.db",
+        "fleet-bmo-bot.db",
+    ]
+    OLLAMA_BASE_URL = os.environ.get(
+        "FLOWERCORE_AGENTZERO_OLLAMA_URL",
+        "http://host.containers.internal:11434",
+    )
+    BRIDGE_API_KEY = os.environ.get("FC_LLM_BRIDGE_API_KEY", "").strip()
+    EMBEDDING_MODEL = os.environ.get(
+        "FLOWERCORE_FLEET_EMBEDDING_MODEL",
+        "nomic-embed-text",
+    )
+
+
+    class CorpusSearch(Tool):
+        async def execute(self, **kwargs) -> Response:
+            """
+            Semantic search over the FlowerCore fleet corpus (Bible texts, lexicons,
+            dictionaries, morphology) pre-indexed by aistation-indexer.
+
+            Args (via self.args):
+                query (str): Search query text. Required unless action=stats.
+                limit (int): Max results. Default 8.
+                index (str): Optional index name filter ("bible-texts", "lexicons",
+                             "dictionaries", "morphology"). Default: all indexes.
+                repo (str): Optional repo filter (e.g. "world-english-bible").
+                db (str): Override DB path OR file name inside FLOWERCORE_FLEET_VECTOR_DIR
+                          (defaults to /a0/usr/vectors). If omitted, the largest
+                          fleet tier present on disk is picked automatically.
+                action (str): Optional. "stats" returns an inventory of all fleet DBs
+                             visible to the tool (names, sizes, index counts, chunk
+                             counts, last-built timestamps). No embedding call.
+
+            Returns:
+                Response with ranked chunks (score, source, text preview) OR
+                (when action=stats) a markdown inventory of available fleet DBs.
+            """
+            query = (self.args.get("query") or "").strip()
+            limit = int(self.args.get("limit") or 8)
+            index_filter = (self.args.get("index") or "").strip()
+            repo_filter = (self.args.get("repo") or "").strip()
+            db_override = (self.args.get("db") or "").strip()
+            action = (self.args.get("action") or "").strip().lower()
+
+            if action == "stats":
+                return Response(message=_render_stats(), break_loop=False)
+
+            if not query:
+                return Response(
+                    message=(
+                        "Error: 'query' is required unless action=stats.\n"
+                        "Example: query=\"what does Genesis 1:1 say\" limit=5\n"
+                        "Inventory: action=stats"
+                    ),
+                    break_loop=False,
+                )
+
+            db = _resolve_db(db_override)
+            if db is None:
+                return Response(
+                    message=(
+                        f"Error: no fleet vector DB found under {DEFAULT_VECTORS_DIR}.\n"
+                        "Host side: run `aistation-indexer build fleet-workstation-full`\n"
+                        "(or `fleet-pi-edge`/`fleet-bmo-bot`) to produce\n"
+                        "`/var/lib/flowercore/vector-stores/<slug>.db`, then confirm the\n"
+                        "Podman unit mounts that directory into `/a0/usr/vectors:ro`."
+                    ),
+                    break_loop=False,
+                )
+
+            try:
+                query_vec = _embed(query)
+            except Exception as e:
+                return Response(
+                    message=f"Error: failed to embed query via Ollama at {OLLAMA_BASE_URL}: {e}",
+                    break_loop=False,
+                )
+
+            try:
+                hits = _search(db, query_vec, index_filter, repo_filter, limit)
+            except Exception as e:
+                return Response(
+                    message=f"Error: corpus search failed: {e}",
+                    break_loop=False,
+                )
+
+            if not hits:
+                return Response(
+                    message=(
+                        f"No matches for '{query}' in {db.name}.\n"
+                        f"Indexes available: " + _list_indexes_summary(db)
+                    ),
+                    break_loop=False,
+                )
+
+            lines = [f"**Corpus search: `{query}`**  (top {len(hits)} of {limit} requested, DB={db.name})", ""]
+            for rank, h in enumerate(hits, 1):
+                passage = h.get("passage") or ""
+                lang = h.get("language") or ""
+                meta_bits = [x for x in (h["index"], h["repo"], passage, lang) if x]
+                meta = "  ·  ".join(meta_bits)
+                preview = h["text"]
+                if len(preview) > 320:
+                    preview = preview[:320].rstrip() + "…"
+                lines.append(f"{rank}. **{h['score']:.3f}**  {meta}")
+                lines.append(f"   `{h['source']}`")
+                lines.append(f"   {preview}")
+                lines.append("")
+
+            return Response(message="\n".join(lines).rstrip() + "\n", break_loop=False)
+
+
+    def _resolve_db(override: str) -> "Path | None":
+        """Pick a fleet DB by explicit path, explicit filename, or preferred order."""
+        vectors_dir = Path(DEFAULT_VECTORS_DIR)
+        if override:
+            # Absolute or relative path that points at a real file wins outright.
+            p = Path(override)
+            if p.is_absolute() and p.exists():
+                return p
+            # Otherwise treat it as a filename within the vectors dir.
+            candidate = vectors_dir / override
+            if candidate.exists():
+                return candidate
+            return None
+
+        for name in PREFERRED_DB_ORDER:
+            if not name:
+                continue
+            p = Path(name) if Path(name).is_absolute() else vectors_dir / name
+            if p.exists():
+                return p
+
+        # Fallback: any *.db in the dir, largest first.
+        if vectors_dir.is_dir():
+            candidates = sorted(vectors_dir.glob("*.db"), key=lambda p: p.stat().st_size, reverse=True)
+            if candidates:
+                return candidates[0]
+        return None
+
+
+    def _embed(text: str) -> list:
+        """Embed a query via Ollama's /api/embeddings. Single-vector response."""
+        body = json.dumps({"model": EMBEDDING_MODEL, "prompt": text}).encode("utf-8")
+        headers = {"Content-Type": "application/json"}
+        if BRIDGE_API_KEY:
+            headers["X-Api-Key"] = BRIDGE_API_KEY
+        req = urllib.request.Request(
+            f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings",
+            data=body,
+            headers=headers,
+        )
+        with urllib.request.urlopen(req, timeout=60) as resp:
+            data = json.loads(resp.read().decode("utf-8"))
+        vec = data.get("embedding")
+        if not isinstance(vec, list) or not vec:
+            raise RuntimeError(f"Ollama returned no embedding: {data}")
+        return [float(x) for x in vec]
+
+
+    def _cosine(a: list, b: list) -> float:
+        """Cosine similarity in pure Python — no numpy in the A0 container."""
+        # zip() stops at the shorter — AiStation DB guarantees same dim per index.
+        dot = 0.0
+        na = 0.0
+        nb = 0.0
+        for x, y in zip(a, b):
+            dot += x * y
+            na += x * x
+            nb += y * y
+        if na == 0.0 or nb == 0.0:
+            return 0.0
+        return dot / (math.sqrt(na) * math.sqrt(nb))
+
+
+    def _search(db_path: Path, query_vec: list, index_filter: str, repo_filter: str, limit: int) -> list:
+        """Load entries, compute cosine, return top-K.
+
+        SqliteVecVectorStore schema:
+          VectorIndexes(IndexName, Dimensions, UpdatedAtUtc)
+          VectorEntries(IndexName, ChunkId, TextContent, SourceRepo, SourceFile,
+                        Book, Chapter, VerseRange, Language, ContentType, License,
+                        EstimatedTokens, EmbeddingJson)
+
+        Embeddings are stored as JSON arrays in EmbeddingJson; similarity is computed
+        in Python. For ~100k chunks × 768 dims this takes a couple seconds on a
+        workstation — acceptable for interactive A0 use.
+        """
+        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        try:
+            sql = [
+                "SELECT IndexName, ChunkId, TextContent, SourceRepo, SourceFile, ",
+                "       Book, Chapter, VerseRange, Language, EmbeddingJson ",
+                "FROM VectorEntries",
+            ]
+            where = []
+            params = []
+            if index_filter:
+                where.append("IndexName = ?")
+                params.append(index_filter)
+            if repo_filter:
+                where.append("SourceRepo LIKE ?")
+                params.append(f"%{repo_filter}%")
+            if where:
+                sql.append(" WHERE " + " AND ".join(where))
+            sql.append(";")
+
+            cursor = conn.execute("".join(sql), params)
+
+            # Min-heap by (score, ...) would be faster but for interactive use we
+            # just sort at the end — simpler and readable.
+            scored = []
+            for row in cursor:
+                idx, chunk_id, text, repo, source_file, book, chapter, verses, lang, emb_json = row
+                try:
+                    vec = json.loads(emb_json)
+                except (json.JSONDecodeError, TypeError):
+                    continue
+                score = _cosine(query_vec, vec)
+                passage = None
+                if book and chapter:
+                    passage = f"{book} {chapter}"
+                    if verses:
+                        passage += f":{verses}"
+                scored.append((score, {
+                    "index": idx,
+                    "chunk_id": chunk_id,
+                    "text": text,
+                    "repo": repo or "",
+                    "source": source_file or "",
+                    "passage": passage or "",
+                    "language": lang or "",
+                }))
+            scored.sort(key=lambda t: t[0], reverse=True)
+            return [{"score": s, **meta} for s, meta in scored[:limit]]
+        finally:
+            conn.close()
+
+
+    def _render_stats() -> str:
+        """Markdown inventory of every *.db in FLOWERCORE_FLEET_VECTOR_DIR."""
+        vectors_dir = Path(DEFAULT_VECTORS_DIR)
+        if not vectors_dir.is_dir():
+            return f"No fleet vector dir mounted at {vectors_dir}. Ask the host operator to build an index with scripts/agent-zero/build-fleet-index.sh."
+
+        dbs = sorted(vectors_dir.glob("*.db"))
+        if not dbs:
+            return f"No fleet DBs present under {vectors_dir}. Run `scripts/agent-zero/build-fleet-index.sh fleet-workstation-full` on the host."
+
+        lines = [f"**Fleet vector DB inventory** ({vectors_dir})", ""]
+        for db in dbs:
+            size_mb = db.stat().st_size / (1024 * 1024)
+            lines.append(f"### `{db.name}` ({size_mb:.1f} MB)")
+            try:
+                conn = sqlite3.connect(f"file:{db}?mode=ro", uri=True)
+                try:
+                    idx_rows = conn.execute(
+                        "SELECT IndexName, Dimensions, UpdatedAtUtc FROM VectorIndexes ORDER BY IndexName;"
+                    ).fetchall()
+                    if not idx_rows:
+                        lines.append("- (no indexes registered)")
+                    else:
+                        counts = dict(conn.execute(
+                            "SELECT IndexName, COUNT(*) FROM VectorEntries GROUP BY IndexName;"
+                        ).fetchall())
+                        for name, dim, updated in idx_rows:
+                            count = counts.get(name, 0)
+                            lines.append(f"- **{name}** — {count:,} chunks × {dim}d  (built {updated})")
+                finally:
+                    conn.close()
+            except Exception as e:
+                lines.append(f"- (inspect failed: {e})")
+            lines.append("")
+
+        lines.append(f"**Tool defaults:** embedding model `{EMBEDDING_MODEL}`, Ollama at `{OLLAMA_BASE_URL}`. Pick a DB with `db=<filename>`; filter by `index=<name>`/`repo=<substring>`.")
+        return "\n".join(lines).rstrip() + "\n"
+
+
+    def _list_indexes_summary(db_path: Path) -> str:
+        try:
+            conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+            try:
+                rows = conn.execute(
+                    "SELECT IndexName, Dimensions, "
+                    "  (SELECT COUNT(*) FROM VectorEntries WHERE VectorEntries.IndexName = VectorIndexes.IndexName) "
+                    "FROM VectorIndexes ORDER BY IndexName;"
+                ).fetchall()
+                if not rows:
+                    return "(no indexes)"
+                return ", ".join(f"{r[0]}({r[2]}×{r[1]}d)" for r in rows)
+            finally:
+                conn.close()
+        except Exception as e:
+            return f"(couldn't list: {e})"
+
+  intranet_search.py: |
+    # Intranet Vector Search Tool
+    # Queries the Blue Jay Lab Intranet's Shared.Indexing RAG corpus over its
+    # live REST API (https://intranet.iamworkin.lan/search). Returns ranked chunks
+    # with source file paths and scores.
+
+    import json
+    import os
+    import ssl
+    import urllib.parse
+    import urllib.request
+
+    from python.helpers.tool import Tool, Response
+
+
+    INTRANET_BASE_URL = os.environ.get(
+        "FLOWERCORE_INTRANET_URL",
+        "https://intranet.iamworkin.lan",
+    )
+    STEPCA_ROOT_CRT = "/a0/usr/ca/stepca-root.crt"
+
+
+    def _ssl_ctx() -> ssl.SSLContext:
+        ctx = ssl.create_default_context()
+        if os.path.exists(STEPCA_ROOT_CRT):
+            ctx.load_verify_locations(cafile=STEPCA_ROOT_CRT)
+        return ctx
+
+
+    class IntranetSearch(Tool):
+        async def execute(self, **kwargs) -> Response:
+            """
+            Search the Blue Jay Lab intranet corpus (docs, project notes, dashboards).
+
+            Args (via self.args):
+                query (str): Search query. Required.
+                limit (int): Max chunks to return. Default 8.
+                corpus (str): Optional corpus filter (e.g. "notes", "docs").
+
+            Returns:
+                Response with ranked chunk text, source path, and score.
+            """
+            query = self.args.get("query", "").strip()
+            limit = int(self.args.get("limit", 8))
+            corpus = self.args.get("corpus", "").strip()
+
+            if not query:
+                return Response(
+                    message="Error: 'query' is required.",
+                    break_loop=False,
+                )
+
+            params = {"q": query, "topK": str(limit)}
+            if corpus:
+                params["indexName"] = corpus
+            url = f"{INTRANET_BASE_URL}/api/search?{urllib.parse.urlencode(params)}"
+
+            try:
+                req = urllib.request.Request(url, headers={"Accept": "application/json"})
+                with urllib.request.urlopen(req, timeout=20, context=_ssl_ctx()) as resp:
+                    raw = resp.read().decode("utf-8", errors="replace")
+            except Exception as exc:
+                return Response(
+                    message=f"Intranet search failed: {exc}\nURL: {url}",
+                    break_loop=False,
+                )
+
+            try:
+                data = json.loads(raw)
+            except json.JSONDecodeError:
+                return Response(
+                    message=f"Intranet returned non-JSON response:\n{raw[:500]}",
+                    break_loop=False,
+                )
+
+            hits = data if isinstance(data, list) else (
+                data.get("results") or data.get("hits") or data.get("chunks") or []
+            )
+            if not hits:
+                return Response(
+                    message=f"No intranet results for query: {query!r}",
+                    break_loop=False,
+                )
+
+            lines = [f"# Intranet search: {query} ({len(hits)} hits)\n"]
+            for i, hit in enumerate(hits[:limit], 1):
+                src = (
+                    hit.get("sourceFile")
+                    or hit.get("source")
+                    or hit.get("path")
+                    or hit.get("file")
+                    or "?"
+                )
+                repo = hit.get("sourceRepo") or ""
+                idx = hit.get("indexName") or ""
+                score = hit.get("score") or hit.get("similarity") or ""
+                text = (
+                    hit.get("snippet")
+                    or hit.get("text")
+                    or hit.get("content")
+                    or hit.get("chunk")
+                    or ""
+                ).strip()
+                if len(text) > 600:
+                    text = text[:600] + "..."
+                header = f"## [{i}] {repo}/{src}" if repo else f"## [{i}] {src}"
+                if idx:
+                    header += f"  ({idx})"
+                if score:
+                    header += f"  score={score:.3f}" if isinstance(score, float) else f"  score={score}"
+                lines.append(header)
+                lines.append(text)
+                lines.append("")
+
+            return Response(message="\n".join(lines), break_loop=False)
+
 kind: ConfigMap
 metadata:
  name: bluejay-tools-c
--- a/apps/edge2-services/edge2-services.yaml
+++ b/apps/edge2-services/edge2-services.yaml
@@ -0,0 +1,106 @@
+# edge2 Services — Traefik IngressRoutes for FlowerCore Print.Web on edge2
+# Proxies print.iamworkin.lan to edge2 (10.0.57.16:5200) via headless Service
+# + manual Endpoints (same K8s external-proxy pattern as noc-services).
+#
+# Print.Web has its own X-Api-Key authentication and exposes anonymous
+# endpoints for the bookmarklet / Python CLI / cups-notifier flow, so no
+# Traefik basicAuth middleware is wired here.
+#
+# ArgoCD managed - BlueJay Lab
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: edge2-proxy
+  labels:
+    app.kubernetes.io/part-of: bluejay-infra
+---
+# ============================================================
+# Print.Web - edge2:5200 (FlowerCore.Print.Web on Pi 4)
+# ============================================================
+apiVersion: v1
+kind: Service
+metadata:
+  name: print-web-external
+  namespace: edge2-proxy
+spec:
+  ports:
+    - port: 5200
+      targetPort: 5200
+      name: http
+  clusterIP: None
+---
+apiVersion: v1
+kind: Endpoints
+metadata:
+  name: print-web-external
+  namespace: edge2-proxy
+subsets:
+  - addresses:
+      - ip: 10.0.57.16
+    ports:
+      - port: 5200
+        name: http
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: print-web-tls
+  namespace: edge2-proxy
+spec:
+  secretName: print-web-tls
+  issuerRef:
+    name: step-ca-acme
+    kind: ClusterIssuer
+  dnsNames:
+    - print.iamworkin.lan
+---
+apiVersion: traefik.io/v1alpha1
+kind: IngressRoute
+metadata:
+  name: print-web
+  namespace: edge2-proxy
+spec:
+  entryPoints:
+    - websecure
+  routes:
+    - kind: Rule
+      match: Host(`print.iamworkin.lan`)
+      services:
+        - name: print-web-external
+          port: 5200
+  tls:
+    secretName: print-web-tls
+---
+# NetworkPolicy: allow Traefik ingress, allow egress to edge2 + DNS
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: edge2-proxy-netpol
+  namespace: edge2-proxy
+spec:
+  podSelector: {}
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: traefik-system
+  egress:
+    - to:
+        - ipBlock:
+            cidr: 10.0.57.16/32
+      ports:
+        - port: 5200
+          protocol: TCP
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: kube-system
+      ports:
+        - port: 53
+          protocol: UDP
+        - port: 53
+          protocol: TCP
--- a/apps/fc-llm-bridge/fc-llm-bridge.yaml
+++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml
@@ -97,7 +97,7 @@ spec:
          #   dotnet.exe publish -c Release -o deploy/app \
          #     src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
          #   podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
-          image: localhost/fc-llm-bridge:v202604231520
+          image: localhost/fc-llm-bridge:v202604292028
          imagePullPolicy: Never
          ports:
            - containerPort: 8080
@@ -116,6 +116,10 @@ spec:
              value: "default"
            - name: FlowerCore__LlmBridge__DefaultAppName
              value: "agent-zero"
+            - name: FlowerCore__LlmBridge__UtilModel
+              value: "qwen2.5:1.5b"
+            - name: FlowerCore__LlmBridge__EmbedModel
+              value: "nomic-embed-text"
            # Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys.
            # Each field becomes a Secret key of the same name. The key-name
            # lands in the auth principal's `fc.app` claim for ledger scoping.
--- a/apps/fc-ttsreader/fc-ttsreader.yaml
+++ b/apps/fc-ttsreader/fc-ttsreader.yaml
@@ -296,14 +296,23 @@ spec:
            periodSeconds: 10
            timeoutSeconds: 5
            failureThreshold: 18
+          # Sprint E Phase 1a (kokoro stability) — 4 restarts in 2d6h with
+          # exit 143 traced to liveness probe `context deadline exceeded` while
+          # kokoro was busy synthesizing. /v1/audio/voices shares the FastAPI
+          # worker pool with /v1/audio/speech, so a long synth can starve the
+          # probe out within the prior 5s × 3 = 15s window. Bump timeoutSeconds
+          # 5 → 15 and failureThreshold 3 → 5 → 75s grace before kubelet kills
+          # the pod. The TtsCircuitBreaker on the synthesizer side (Phase 1b)
+          # backs this up so the FC backend stops slamming kokoro during
+          # recovery.
          livenessProbe:
            httpGet:
              path: /v1/audio/voices
              port: 8880
            initialDelaySeconds: 180
            periodSeconds: 30
-            timeoutSeconds: 5
-            failureThreshold: 3
+            timeoutSeconds: 15
+            failureThreshold: 5
 ---
 # fc-biblical-tts — eSpeak-NG-backed Ancient Greek + Hebrew TTS with
 # word-level timing for read-along playback. Companion to ttsreader-kokoro
@@ -379,6 +388,94 @@ spec:
      targetPort: 10402
      name: http
 ---
+# fc-modern-tts — Microsoft Edge Read Aloud bridge for Modern Hebrew
+# (he-IL-AvriNeural et al) and Modern Greek (el-GR-NestorasNeural et al).
+# Pairs with ttsreader-biblical: biblical engine handles unpointed
+# Greek + Hebrew, modern engine handles narrative translations the
+# operator reads alongside.
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ttsreader-modern
+  namespace: fc-ttsreader
+  labels:
+    app.kubernetes.io/name: ttsreader-modern
+    app.kubernetes.io/part-of: flowercore
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: ttsreader-modern
+  template:
+    metadata:
+      labels:
+        app.kubernetes.io/name: ttsreader-modern
+        app.kubernetes.io/part-of: flowercore
+    spec:
+      # edge-tts needs egress to *.tts.speech.microsoft.com — bypass the
+      # iamworkin.lan template hijack so the lookup doesn't fall back to
+      # Traefik VIP via search expansion.
+      dnsPolicy: None
+      dnsConfig:
+        nameservers:
+          - 10.43.0.10
+        searches:
+          - fc-ttsreader.svc.cluster.local
+          - svc.cluster.local
+          - cluster.local
+        options:
+          - name: ndots
+            value: "2"
+      securityContext:
+        fsGroup: 1654
+        runAsNonRoot: true
+        runAsUser: 1654
+      containers:
+        - name: modern-tts
+          image: localhost/fc-modern-tts:v1
+          imagePullPolicy: Never
+          ports:
+            - containerPort: 10403
+              name: http
+          resources:
+            requests:
+              cpu: 100m
+              memory: 128Mi
+            limits:
+              cpu: 1000m
+              memory: 512Mi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 10403
+            initialDelaySeconds: 5
+            periodSeconds: 10
+            timeoutSeconds: 5
+            failureThreshold: 6
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: 10403
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            timeoutSeconds: 5
+            failureThreshold: 3
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: ttsreader-modern
+  namespace: fc-ttsreader
+spec:
+  selector:
+    app.kubernetes.io/name: ttsreader-modern
+  ports:
+    - port: 10403
+      targetPort: 10403
+      name: http
+---
 apiVersion: v1
 kind: Service
 metadata:
@@ -422,7 +519,7 @@ spec:
        fsGroupChangePolicy: OnRootMismatch
      containers:
        - name: web
-          image: localhost/fc-ttsreader-web:v202604251714
+          image: localhost/fc-ttsreader-web:v202604291817
          imagePullPolicy: Never
          ports:
            - containerPort: 5217
@@ -485,6 +582,19 @@ spec:
              value: "/data/logs"
            - name: TtsReader__Runtime__SmokeStatePath
              value: "/data/ops/smoke-status.json"
+            # Sprint E Day 8 voice-preview disk cache — writes WAVs under
+            # this directory. Default "data/voice-previews" resolves to
+            # the read-only $HOME path under runAsNonRoot=true. Pin to
+            # the writable PVC mount.
+            - name: TtsReader__Preview__CacheDirectory
+              value: "/data/voice-previews"
+            # Sprint E XXL Phase 4γ — content-addressed CDN bundle dir for
+            # POST /api/v1/render. Default "wwwroot/cdn" resolves under the
+            # read-only app filesystem, so pin to the writable PVC mount
+            # alongside other TtsReader runtime data. Manifests + cue audio
+            # land at /data/cdn/sha256/<hash>/manifest.json + cues/.
+            - name: TtsReader__Render__CdnDirectory
+              value: "/data/cdn"
            - name: Auth__ApiKey
              valueFrom:
                secretKeyRef:
--- a/apps/fc-ttsreader/modern-tts/Dockerfile
+++ b/apps/fc-ttsreader/modern-tts/Dockerfile
@@ -0,0 +1,36 @@
+# FlowerCore modern-tts — wraps Microsoft Edge's Read Aloud TTS service
+# (via the edge-tts Python package) to give the cluster studio-quality
+# Modern Hebrew (he-IL-*) and Modern Greek (el-GR-*) voices alongside the
+# eSpeak biblical engine. Same shape as fc-biblical-tts so the .NET client
+# lives in the same Shared.Speech package.
+#
+# Note: edge-tts depends on Microsoft's public Edge endpoint; the cluster
+# pod needs egress to *.tts.speech.microsoft.com. dnsPolicy: None on the
+# Deployment makes sure the iamworkin.lan template hijack doesn't rewrite
+# the lookup back to Traefik VIP.
+FROM python:3.12-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PIP_NO_CACHE_DIR=1
+
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app.py /app/
+
+RUN useradd --create-home --shell /usr/sbin/nologin --uid 1654 tts
+USER 1654
+
+EXPOSE 10403
+HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
+    CMD python -c "import urllib.request,sys; urllib.request.urlopen('http://127.0.0.1:10403/health',timeout=3); sys.exit(0)" || exit 1
+
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "10403", "--workers", "1"]
--- a/apps/fc-ttsreader/modern-tts/app.py
+++ b/apps/fc-ttsreader/modern-tts/app.py
@@ -0,0 +1,238 @@
+"""FlowerCore modern-tts — Microsoft Edge Read Aloud bridge for Modern
+Hebrew and Modern Greek (and other Edge-supported languages).
+
+Endpoints:
+
+* POST /tts          — body: {"text", "voice", "rate"?, "volume"?, "pitch"?}
+                       returns audio/mpeg (Edge returns MP3) which the
+                       upstream FasterWhisperAlignmentClient + the WPF
+                       MediaPlayer both handle natively.
+* POST /timings      — same body shape but returns
+                       {"text", "voice", "words": [{"text","startMs","endMs"}],
+                        "durationMs": ...} sourced from Edge's WordBoundary
+                       events — much more accurate than eSpeak's
+                       proportional-distribution approach because Edge
+                       emits real per-word offsets during synthesis.
+* GET  /voices       — voice catalog Edge knows about. Filtered to
+                       Hebrew + Greek by default; ?language=all returns
+                       everything Edge supports.
+* GET  /health       — fast readiness check.
+
+Pairs with fc-biblical-tts (eSpeak Ancient Greek + Hebrew). The biblical
+engine handles unpointed Hebrew + Erasmian Greek; this engine handles
+narrative Modern Hebrew + Modern Greek for translations the operator
+might be reading alongside the original.
+"""
+from __future__ import annotations
+
+import io
+import logging
+from typing import Optional
+
+import edge_tts
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse, Response
+from pydantic import BaseModel
+
+LOG = logging.getLogger("modern_tts")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
+
+app = FastAPI(title="FlowerCore modern-tts", version="1.0.0")
+
+# Default voices by short code so AiStation can pick a sensible default
+# when the operator hasn't explicitly asked for one. Edge has multiple
+# voices per locale — these are the calmest male+female narrators.
+DEFAULT_VOICES = {
+    "he":    "he-IL-AvriNeural",
+    "he-IL": "he-IL-AvriNeural",
+    "el":    "el-GR-NestorasNeural",
+    "el-GR": "el-GR-NestorasNeural",
+    "en":    "en-US-AriaNeural",
+}
+
+
+class TtsRequest(BaseModel):
+    text: str
+    voice: Optional[str] = None
+    language: Optional[str] = None
+    rate: str = "+0%"     # Edge accepts +20%, -10%, etc.
+    volume: str = "+0%"
+    pitch: str = "+0Hz"
+
+
+def _resolve_voice(req: TtsRequest) -> str:
+    if req.voice:
+        return req.voice.strip()
+    if req.language and req.language in DEFAULT_VOICES:
+        return DEFAULT_VOICES[req.language]
+    return DEFAULT_VOICES["he"]
+
+
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+
+
+@app.get("/voices")
+async def voices(language: str = "default"):
+    catalog = await edge_tts.list_voices()
+    if language == "all":
+        return {"voices": catalog}
+
+    # Default response: filter to languages relevant to the FlowerCore
+    # biblical workflow (Hebrew + Greek) so the AiStation voice picker
+    # isn't overwhelmed by 400+ Edge voices.
+    keep = ("he-", "el-")
+    filtered = [v for v in catalog if any(v.get("ShortName", "").startswith(k) for k in keep)]
+    return {"voices": filtered}
+
+
+async def _synth_with_subtitles(req: TtsRequest):
+    voice = _resolve_voice(req)
+    LOG.info("edge-tts synth voice=%s len=%d", voice, len(req.text))
+    communicate = edge_tts.Communicate(
+        req.text,
+        voice=voice,
+        rate=req.rate,
+        volume=req.volume,
+        pitch=req.pitch,
+    )
+    audio_buf = io.BytesIO()
+    word_events: list[dict] = []
+    async for chunk in communicate.stream():
+        if chunk["type"] == "audio":
+            audio_buf.write(chunk["data"])
+        elif chunk["type"] == "WordBoundary":
+            word_events.append({
+                "text": chunk.get("text") or "",
+                "offset": chunk.get("offset", 0),       # 100-ns ticks
+                "duration": chunk.get("duration", 0),   # 100-ns ticks
+            })
+    return voice, audio_buf.getvalue(), word_events
+
+
+def _to_ms(ticks_100ns: int) -> int:
+    # Edge emits offsets in 100-nanosecond ticks (.NET TimeSpan style).
+    return int(round(ticks_100ns / 10_000))
+
+
+@app.post("/tts")
+async def tts(req: TtsRequest):
+    if not req.text.strip():
+        raise HTTPException(status_code=400, detail="text is required")
+    try:
+        voice, audio_bytes, _ = await _synth_with_subtitles(req)
+    except edge_tts.exceptions.NoAudioReceived:
+        raise HTTPException(status_code=502, detail="edge-tts returned no audio for the supplied voice/text.")
+    except Exception as ex:
+        raise HTTPException(status_code=502, detail=f"edge-tts failure: {ex}")
+    if not audio_bytes:
+        raise HTTPException(status_code=502, detail="edge-tts returned an empty audio stream.")
+    return Response(content=audio_bytes, media_type="audio/mpeg",
+                    headers={"X-FlowerCore-Voice": voice})
+
+
+def _estimate_duration_ms_from_mp3(audio_bytes: bytes) -> int:
+    """Best-effort duration estimate from raw MP3 bytes by walking frame
+    headers. Edge always returns CBR ~24kbps mono so we can infer total ms
+    from frame count. If parsing fails, return 0 and let the caller fall
+    through to a per-character heuristic."""
+    if not audio_bytes:
+        return 0
+    # MP3 sample rates by version+layer (MPEG1 layer3 / MPEG2 layer3 / MPEG2.5 layer3).
+    # We just walk frame headers and count frames; each frame is 1152 samples.
+    sample_rates_v1 = [44100, 48000, 32000, 0]
+    sample_rates_v2 = [22050, 24000, 16000, 0]
+    sample_rates_v25 = [11025, 12000, 8000, 0]
+    bitrates_v1_l3 = [0,32000,40000,48000,56000,64000,80000,96000,112000,128000,160000,192000,224000,256000,320000,0]
+    bitrates_v2_l3 = [0,8000,16000,24000,32000,40000,48000,56000,64000,80000,96000,112000,128000,144000,160000,0]
+
+    pos = 0
+    total_samples = 0
+    sample_rate = 0
+    while pos + 4 <= len(audio_bytes):
+        b0, b1, b2, b3 = audio_bytes[pos], audio_bytes[pos+1], audio_bytes[pos+2], audio_bytes[pos+3]
+        if b0 != 0xFF or (b1 & 0xE0) != 0xE0:
+            pos += 1
+            continue
+        version_bits = (b1 >> 3) & 0x03
+        layer_bits = (b1 >> 1) & 0x03
+        if layer_bits != 0x01:  # layer 3 only
+            pos += 1
+            continue
+        bitrate_index = (b2 >> 4) & 0x0F
+        sample_rate_index = (b2 >> 2) & 0x03
+        padding = (b2 >> 1) & 0x01
+        if version_bits == 0x03:       # MPEG1
+            sample_rate = sample_rates_v1[sample_rate_index]
+            bitrate = bitrates_v1_l3[bitrate_index]
+            samples_per_frame = 1152
+        elif version_bits == 0x02:     # MPEG2
+            sample_rate = sample_rates_v2[sample_rate_index]
+            bitrate = bitrates_v2_l3[bitrate_index]
+            samples_per_frame = 576
+        elif version_bits == 0x00:     # MPEG2.5
+            sample_rate = sample_rates_v25[sample_rate_index]
+            bitrate = bitrates_v2_l3[bitrate_index]
+            samples_per_frame = 576
+        else:
+            pos += 1
+            continue
+        if not (sample_rate and bitrate):
+            pos += 1
+            continue
+        frame_length = int((samples_per_frame * bitrate / 8) / sample_rate) + padding
+        if frame_length <= 0:
+            pos += 1
+            continue
+        total_samples += samples_per_frame
+        pos += frame_length
+
+    if sample_rate <= 0:
+        return 0
+    return int(round(total_samples * 1000 / sample_rate))
+
+
+@app.post("/timings")
+async def timings(req: TtsRequest):
+    if not req.text.strip():
+        raise HTTPException(status_code=400, detail="text is required")
+    try:
+        voice, audio_bytes, events = await _synth_with_subtitles(req)
+    except Exception as ex:
+        raise HTTPException(status_code=502, detail=f"edge-tts failure: {ex}")
+
+    words: list[dict] = []
+    for event in events:
+        start = _to_ms(event["offset"])
+        end = start + _to_ms(event["duration"])
+        words.append({"text": event.get("text", ""), "startMs": start, "endMs": end})
+
+    # Edge sometimes omits WordBoundary events for non-English voices
+    # (notably he-IL-* and el-GR-*). Fall back to proportional distribution
+    # over the input text — same approach the eSpeak biblical-tts uses.
+    if not words and req.text.strip():
+        total_ms = _estimate_duration_ms_from_mp3(audio_bytes)
+        if total_ms <= 0:
+            # Last-resort fallback: ~600ms per word at average speaking rate.
+            total_ms = max(1, len(req.text.split())) * 600
+        tokens = req.text.split()
+        if tokens:
+            char_total = sum(max(1, len(w)) for w in tokens)
+            cursor = 0
+            for token in tokens:
+                share = int(round(total_ms * max(1, len(token)) / char_total))
+                start = cursor
+                end = start + share
+                words.append({"text": token, "startMs": start, "endMs": end})
+                cursor = end
+            words[-1]["endMs"] = total_ms
+
+    duration_ms = words[-1]["endMs"] if words else 0
+    return JSONResponse({
+        "text": req.text,
+        "voice": voice,
+        "words": words,
+        "durationMs": duration_ms,
+        "audioBytes": len(audio_bytes),
+    })
--- a/apps/fc-ttsreader/modern-tts/requirements.txt
+++ b/apps/fc-ttsreader/modern-tts/requirements.txt
@@ -0,0 +1,3 @@
+fastapi==0.115.6
+uvicorn==0.34.0
+edge-tts==7.2.8
--- a/apps/guacamole/guacamole.yaml
+++ b/apps/guacamole/guacamole.yaml
@@ -465,6 +465,22 @@ metadata:
 spec:
  itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
 ---
+---
+# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
+# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
+# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
+# Guacamole VNC connection password is sourced from the 'VNC Password' field.
+# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
+apiVersion: onepassword.com/v1
+kind: OnePasswordItem
+metadata:
+  name: macmini-vnc-creds
+  namespace: guacamole
+  labels:
+    app.kubernetes.io/component: credentials
+    app.kubernetes.io/part-of: flowercore
+spec:
+  itemPath: vaults/IAmWorkin/items/Mac Mini
 # Blue Jay Branding Extension (CSS + translations)
 apiVersion: v1
 kind: ConfigMap
--- a/apps/intranet/intranet.yaml
+++ b/apps/intranet/intranet.yaml
@@ -16,6 +16,15 @@ spec:
    requests:
      storage: 1Gi
 ---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: intranet-config
+  namespace: intranet
+data:
+  KnowledgeApiKey: ""
+  TrustedHeaderSharedSecret: ""
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -37,7 +46,7 @@ spec:
    spec:
      containers:
        - name: intranet-web
-          image: localhost/fc-intranet-web:v202604242354overridefix
+          image: localhost/fc-intranet-web:v20260429-1646
          imagePullPolicy: Never
          ports:
            - containerPort: 5300
@@ -52,6 +61,27 @@ spec:
            # in minutes. Memory: feedback_pi5_nomic_embed_slow.
            - name: IntranetSearch__OllamaBaseUrl
              value: "http://10.0.56.20:11434"
+            # Sprint E Phase 2α — JSON-file-backed PageReadingOverride persistence
+            # on the writable PVC at /data. Without this env var the
+            # intranet falls back to the in-memory store (loses state on
+            # pod restart). Master's PageReadingOverrideOptions binds
+            # PageReadingOverrides:FilePath.
+            - name: PageReadingOverrides__FilePath
+              value: "/data/page-reading-overrides.json"
+            - name: KnowledgeFleetSearch__BaseUrl
+              value: "https://knowledge.iamworkin.lan"
+            - name: KnowledgeFleetSearch__ApiKey
+              valueFrom:
+                configMapKeyRef:
+                  name: intranet-config
+                  key: KnowledgeApiKey
+                  optional: true
+            - name: TrustedHeaderAuthentication__SharedSecret
+              valueFrom:
+                configMapKeyRef:
+                  name: intranet-config
+                  key: TrustedHeaderSharedSecret
+                  optional: true
          resources:
            requests:
              memory: "256Mi"
--- a/apps/knowledge/README.md
+++ b/apps/knowledge/README.md
@@ -0,0 +1,157 @@
+# knowledge — FlowerCore.Knowledge.Web (Phase 2.4 K8s deploy)
+
+**Status:** **LIVE 2026-04-27** at `https://knowledge.iamworkin.lan` —
+Phase 2.4 closed. Pod running, certificate issued (step-ca-acme), PVC
+bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
+returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
+*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
+population).
+
+- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
+- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
+- Repo: `D:\git\FlowerCore\FlowerCore.Knowledge\` (private GitHub repo,
+  bootstrapped Sprint D batch 35)
+
+`FlowerCore.Knowledge.Web` is the fleet-wide vector-indexing & RAG hub —
+a REST + MCP service that scans `*.db` files under
+`/data/vector-stores` and exposes per-edition reachability + corpus
+search to the rest of the FC ecosystem (Agent Zero, Chat.Web persona
+memory, AiStation embeddings explorer, TtsReader chapter context, BMO
+bot, Pi nodes via `fc-index sync`).
+
+## Deployment order (do NOT skip / reorder)
+
+### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200
+
+Required BEFORE the Certificate resource is created, or cert-manager
+HTTP-01 silently backs off ~2h. Memory: `feedback_pfsense_dns_required_for_acme`.
+
+The canonical path is FlowerCore.DNS:
+
+```bash
+curl -sk https://dns.iamworkin.lan/api/v1/servers
+# Find the pfSense serverId, then create the record using the host label only.
+
+curl -sk -X POST https://dns.iamworkin.lan/api/v1/servers/<serverId>/zones/iamworkin.lan/records \
+  -H "Content-Type: application/json" \
+  -d '{"name":"knowledge","type":"A","data":"10.0.56.200","ttl":300}'
+```
+
+If FlowerCore.DNS provider writes are failing 502 with "pfSense
+diag_command.php response did not contain a `<pre>` block" (status as of
+Sprint E Track B authoring 2026-04-27), add the override manually via
+the pfSense web UI:
+
+1. Log in to `https://10.0.56.1` as admin
+2. Services → DNS Resolver → General Settings → Host Overrides
+3. Add: Host=`knowledge`, Domain=`iamworkin.lan`, IP Address=`10.0.56.200`
+4. Save + Apply Changes
+
+Verify resolution from anywhere on LAN:
+
+```bash
+nslookup knowledge.iamworkin.lan 10.0.56.1
+# Expect: 10.0.56.200
+```
+
+Or against FlowerCore.DNS once the provider is fixed:
+
+```bash
+curl -sk "https://dns.iamworkin.lan/api/v1/zones/iamworkin.lan/resolve-preflight?hostname=knowledge.iamworkin.lan"
+# Expect: "resolvable": true
+```
+
+### 2. Build + import the image to ALL RKE2 nodes
+
+Pods may schedule on any RKE2 worker (server, agent1, agent2). The
+Longhorn PVC accepts mounts from any node, so the image must be
+imported to all three. Memory:
+`feedback_rke2_image_import_targets_all_nodes` +
+`feedback_rke2_localhost_imagepullpolicy`.
+
+```bash
+# From BLUEJAY-WS, in D:\git\FlowerCore\FlowerCore.Knowledge
+TAG="v$(date +%Y%m%d%H%M)"
+dotnet.exe publish -c Release -o deploy/app \
+  src/FlowerCore.Knowledge.Web/FlowerCore.Knowledge.Web.csproj
+podman build -t localhost/fc-knowledge-web:$TAG -f deploy/Dockerfile.deploy deploy
+podman save localhost/fc-knowledge-web:$TAG -o /tmp/fc-knowledge-web.tar
+
+# Import to all three RKE2 nodes
+for node in rke2-server rke2-agent1 rke2-agent2; do
+  scp /tmp/fc-knowledge-web.tar $node:/tmp/
+  ssh $node "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-knowledge-web.tar"
+done
+```
+
+The repo's `scripts/deploy-knowledge.sh` automates this loop.
+
+### 3. Bump the image tag + push
+
+Edit `knowledge.yaml`, replace `localhost/fc-knowledge-web:v202604272200`
+with the tag from step 2, then:
+
+```bash
+cd D:/git/FlowerCore/bluejay-infra
+python scripts/check-pfsense-dns.py     # confirms the DNS preflight
+git add apps/knowledge/
+git commit -m "feat(knowledge): deploy Phase 2.4 K8s manifest"
+git push
+```
+
+ArgoCD picks up within ~3 minutes and creates `infra-knowledge`.
+
+### 4. Verify
+
+```bash
+fcadmin_ssh noc1 '
+  kubectl -n argocd get application infra-knowledge
+  kubectl -n knowledge get certificate,pod,pvc
+  curl -sk -m 8 -o /dev/null -w "HTTP %{http_code}\n" https://knowledge.iamworkin.lan/healthz
+  curl -sk -m 8 https://knowledge.iamworkin.lan/api/v1/editions | jq
+'
+```
+
+Expect: Certificate `Ready: True` within ~60s, `/healthz` HTTP 200,
+`/api/v1/editions` returns an empty array (no DBs in the PVC yet) on
+first deploy.
+
+## Initial-deploy state and Phase 2.5 follow-up
+
+The Longhorn PVC is empty on first deploy. Knowledge.Web's filesystem
+catalog will report zero editions until vector-store `*.db` files are
+pushed into `/data/vector-stores`. Initial population is a follow-up
+step (Phase 2.5+, Blazor admin UI's "Rebuild" button); for the first
+deploy the goal is just to prove the pod boots, `/healthz` returns 200,
+and the Traefik IngressRoute serves the Scalar UI.
+
+To copy an existing local DB into the PVC (one-time, manual until
+Phase 2.5 admin UI lands):
+
+```bash
+fcadmin_ssh noc1 '
+  POD=$(kubectl -n knowledge get pod -l app=knowledge-web -o jsonpath="{.items[0].metadata.name}")
+  kubectl -n knowledge cp /var/lib/flowercore/vector-stores/bluejay-ai.db $POD:/data/vector-stores/bluejay-ai.db
+'
+```
+
+## Probes + middleware notes
+
+- `/healthz` is mapped by `Controllers/HealthController.cs` (controller-based
+  attribute route). Cheap — no DB, no dependencies.
+- Liveness uses `tcpSocket` as a defensive fallback in case future
+  middleware accidentally gates `/healthz` behind auth (memory:
+  `feedback_k8s_probes_behind_auth_middleware`).
+- `/openapi/v1.json` and `/scalar/v1` are wired by `UseFlowerCoreApi`.
+  Per memory `feedback_k8s_probes_must_not_hit_openapi`, probes must NOT
+  point at OpenAPI documents — the `MapOpenApi` call can be slow during
+  cold startup.
+
+## Resource sizing
+
+- 256Mi memory request / 1Gi limit.
+- 100m CPU request / 1000m limit.
+- 20Gi Longhorn PVC initial — sufficient for the bluejay-ai 1.94Gi DB +
+  fleet-pi-edge 352Mi + fleet-bmo-bot 141Mi + headroom. Resize via
+  `kubectl -n knowledge edit pvc knowledge-vector-store` if growing
+  past 15Gi.
--- a/apps/knowledge/knowledge.yaml
+++ b/apps/knowledge/knowledge.yaml
@@ -0,0 +1,246 @@
+# FlowerCore.Knowledge.Web — fleet vector indexing & RAG hub.
+#
+# Phase 2.4 of the Knowledge service plan. REST + MCP service that scans
+# *.db files under /data/vector-stores and exposes:
+#   - REST: /api/v1/editions, /api/v1/corpus/search, /healthz
+#   - MCP:  list_editions, describe_edition, corpus_search
+#   - Static OpenAPI/Scalar via UseFlowerCoreApi
+#
+# Architecture:
+#   Plan:    FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md
+#   Sprint:  FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md (Track B)
+#   Repo:    D:\git\FlowerCore\FlowerCore.Knowledge\
+#   Shared:  FlowerCore.Common -> FlowerCore.Shared.Indexing (chunkers, vector
+#            stores, edition profiles, ICorpusSearchService facade)
+#
+# Deployment order (see apps/knowledge/README.md and the bluejay-infra/README.md
+# top-level checklist):
+#   1. FlowerCore.DNS public A record knowledge.iamworkin.lan -> 10.0.56.200
+#      MUST exist BEFORE the Certificate is created, or cert-manager HTTP-01
+#      backs off ~2h. Memory: feedback_pfsense_dns_required_for_acme.
+#   2. Build + import the image to ALL RKE2 nodes (server + both agents) since
+#      the Pod uses a Longhorn PVC and may schedule anywhere.
+#      Memory: feedback_rke2_localhost_imagepullpolicy.
+#   3. Bump the image tag in this file, git push.
+#   4. ArgoCD ApplicationSet picks up within ~3 minutes and creates
+#      infra-knowledge.
+#
+# Initial-deploy state:
+#   The Longhorn PVC is empty on first deploy. Knowledge.Web's filesystem
+#   catalog will report zero editions until vector-store *.db files are
+#   pushed into /data/vector-stores. Initial population is a follow-up step
+#   (Phase 2.5+, Blazor admin UI's "Rebuild" button); for the first deploy
+#   the goal is just to prove the pod boots, /healthz returns 200, and the
+#   Traefik IngressRoute serves the Scalar UI.
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: knowledge
+  labels:
+    app.kubernetes.io/part-of: bluejay-infra
+---
+# MCP API key — synced from 1Password so /mcp stays gated without baking
+# secrets into Git. The PASSWORD category maps the concealed field to Secret
+# key `password`, which the Deployment reads into FlowerCore:Mcp:ApiKey:Key.
+apiVersion: onepassword.com/v1
+kind: OnePasswordItem
+metadata:
+  name: knowledge-mcp-api-key
+  namespace: knowledge
+spec:
+  itemPath: "vaults/IAmWorkin/items/KnowledgeApiKey"
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: knowledge-vector-store
+  namespace: knowledge
+spec:
+  accessModes:
+    - ReadWriteOnce
+  storageClassName: longhorn
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: knowledge-web
+  namespace: knowledge
+  labels:
+    app: knowledge-web
+    app.kubernetes.io/name: knowledge-web
+    app.kubernetes.io/part-of: bluejay-infra
+spec:
+  replicas: 1
+  revisionHistoryLimit: 3
+  # RWO Longhorn PVC blocks rolling updates (multi-attach error). Recreate
+  # is the canonical pattern (memory: feedback_rwo_pvc_blocks_rolling).
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: knowledge-web
+  template:
+    metadata:
+      labels:
+        app: knowledge-web
+        app.kubernetes.io/name: knowledge-web
+        app.kubernetes.io/part-of: bluejay-infra
+      annotations:
+        prometheus.io/scrape: "true"
+        prometheus.io/port: "8080"
+        prometheus.io/path: "/metrics"
+    spec:
+      securityContext:
+        runAsNonRoot: true
+        fsGroup: 1654
+        fsGroupChangePolicy: OnRootMismatch
+      containers:
+        - name: web
+          # Placeholder tag — bump to the image you built + imported to ALL
+          # RKE2 nodes via scripts/deploy-knowledge.sh before applying.
+          image: localhost/fc-knowledge-web:v202604272200
+          imagePullPolicy: Never
+          ports:
+            - containerPort: 8080
+              name: http
+          env:
+            - name: ASPNETCORE_URLS
+              value: "http://+:8080"
+            - name: ASPNETCORE_ENVIRONMENT
+              value: "Production"
+            - name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
+              value: "false"
+            # Vector-store directory + embedding model + edition profile dir.
+            # Profile JSON is baked into the image at /app/editions via the
+            # csproj Content-link from FlowerCore.Common/editions/.
+            - name: Knowledge__VectorStoresDirectory
+              value: "/data/vector-stores"
+            - name: Knowledge__EmbeddingModel
+              value: "nomic-embed-text"
+            - name: Knowledge__DefaultLimit
+              value: "5"
+            - name: Knowledge__MaxLimit
+              value: "50"
+            - name: FlowerCore__Editions__ProfileDirectory
+              value: "/app/editions"
+            # Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
+            # services do not depend on BLUEJAY-WS (private dev hardware) per
+            # bluejay-infra@0f9d56e. Query-time embedding is fast enough on
+            # edge1 (~ms per query); bulk index rebuilds (Phase 2.5+) will
+            # need a separate ingestion lane that can opt into the
+            # workstation GPU when present.
+            - name: FlowerCore__Ollama__BaseUrl
+              value: "http://10.0.57.17:11434"
+            - name: FlowerCore__Mcp__ApiKey__Key
+              valueFrom:
+                secretKeyRef:
+                  name: knowledge-mcp-api-key
+                  key: password
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: 1000m
+              memory: 1Gi
+          # /healthz is mapped by HealthController (controller-based route).
+          # tcpSocket liveness is the defensive fallback in case middleware
+          # later gates /healthz behind auth (memory:
+          # feedback_k8s_probes_behind_auth_middleware).
+          startupProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            initialDelaySeconds: 5
+            periodSeconds: 5
+            failureThreshold: 30
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: 8080
+            periodSeconds: 10
+            failureThreshold: 3
+          livenessProbe:
+            tcpSocket:
+              port: 8080
+            initialDelaySeconds: 30
+            periodSeconds: 30
+            failureThreshold: 3
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 1654
+            runAsGroup: 1654
+            allowPrivilegeEscalation: false
+            readOnlyRootFilesystem: true
+            capabilities:
+              drop:
+                - ALL
+          volumeMounts:
+            - name: vector-store
+              mountPath: /data/vector-stores
+            - name: tmp
+              mountPath: /tmp
+            - name: logs
+              mountPath: /app/logs
+      volumes:
+        - name: vector-store
+          persistentVolumeClaim:
+            claimName: knowledge-vector-store
+        - name: tmp
+          emptyDir: {}
+        - name: logs
+          emptyDir: {}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: knowledge-web
+  namespace: knowledge
+  labels:
+    app: knowledge-web
+    app.kubernetes.io/name: knowledge-web
+    app.kubernetes.io/part-of: bluejay-infra
+spec:
+  type: ClusterIP
+  selector:
+    app: knowledge-web
+  ports:
+    - name: http
+      port: 80
+      targetPort: 8080
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: knowledge-tls
+  namespace: knowledge
+spec:
+  secretName: knowledge-tls
+  issuerRef:
+    name: step-ca-acme
+    kind: ClusterIssuer
+  dnsNames:
+    - knowledge.iamworkin.lan
+  duration: 2160h    # 90d
+  renewBefore: 720h  # 30d
+---
+apiVersion: traefik.io/v1alpha1
+kind: IngressRoute
+metadata:
+  name: knowledge
+  namespace: knowledge
+spec:
+  entryPoints:
+    - websecure
+  routes:
+    - match: Host(`knowledge.iamworkin.lan`)
+      kind: Rule
+      services:
+        - name: knowledge-web
+          port: 80
+  tls:
+    secretName: knowledge-tls
--- a/apps/knowledge/kustomization.yaml
+++ b/apps/knowledge/kustomization.yaml
@@ -0,0 +1,7 @@
+# ArgoCD's bluejay-infra ApplicationSet uses a directory generator and does
+# not require kustomization.yaml. Mirrors the fc-distribution shape so
+# `kubectl kustomize` previews work from a working copy.
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - knowledge.yaml
--- a/apps/monitoring/noc-monitoring.yaml
+++ b/apps/monitoring/noc-monitoring.yaml
@@ -104,21 +104,27 @@ data:
          - target_label: __address__
            replacement: snmp-exporter.monitoring.svc:9116

-      # UniFi Cloud Key SNMP
-      - job_name: "snmp-cloudkey"
-        static_configs:
-          - targets: ["10.0.56.3"]
-        metrics_path: /snmp
-        params:
-          module: [if_mib]
-          auth: [bluejay_v2]
-        relabel_configs:
-          - source_labels: [__address__]
-            target_label: __param_target
-          - source_labels: [__param_target]
-            target_label: instance
-          - target_label: __address__
-            replacement: snmp-exporter.monitoring.svc:9116
+      # UniFi Cloud Key SNMP — DISABLED 2026-04-26
+      # The Cloud Key Gen2+ runs unifi-core (controller) only — not a network
+      # device — and does NOT run an SNMP agent on UDP/161. Scrapes were
+      # silently failing with "connection refused" from 10.42.x.x:161 every
+      # 30s, polluting up{} = 0 and lastError on the Targets page. Hardware
+      # health (CPU/mem/disk) for the Cloud Key host should come from
+      # node_exporter via SSH — not SNMP.
+      # - job_name: "snmp-cloudkey"
+      #   static_configs:
+      #     - targets: ["10.0.56.3"]
+      #   metrics_path: /snmp
+      #   params:
+      #     module: [if_mib]
+      #     auth: [bluejay_v2]
+      #   relabel_configs:
+      #     - source_labels: [__address__]
+      #       target_label: __param_target
+      #     - source_labels: [__param_target]
+      #       target_label: instance
+      #     - target_label: __address__
+      #       replacement: snmp-exporter.monitoring.svc:9116

      # UniFi Switch SNMP
      - job_name: "snmp-switch"
@@ -279,10 +285,13 @@ data:
            replacement: blackbox-exporter.monitoring.svc:9115

      # FlowerCore.RemoteDesktop web health (public cluster VIP)
+      # Module is https_internal — desktop.iamworkin.lan uses a step-ca leaf
+      # cert; blackbox does NOT trust step-ca root, so http_2xx fails with
+      # x509 unknown authority and probe_success=0 even when /health 200s.
      - job_name: "probe-remotedesktop"
        metrics_path: /probe
        params:
-          module: [http_2xx]
+          module: [https_internal]
        scrape_interval: 30s
        static_configs:
          - targets: ["https://desktop.iamworkin.lan/health"]
@@ -330,26 +339,12 @@ data:
      # AI Stack Health Probes (Blackbox Exporter)
      # =============================================================================

-      # Ollama API — workstation (LOCAL Agent Zero)
-      - job_name: "probe-ollama-local"
-        metrics_path: /probe
-        params:
-          module: [http_ollama]
-        scrape_interval: 30s
-        static_configs:
-          - targets: ["http://10.0.58.100:11434/api/tags"]
-            labels:
-              instance: "ollama-local"
-              service: "ollama"
-              deployment: "local"
-              gpu: "r9700"
-        relabel_configs:
-          - source_labels: [__address__]
-            target_label: __param_target
-          - source_labels: [__param_target]
-            target_label: instance
-          - target_label: __address__
-            replacement: blackbox-exporter.monitoring.svc:9115
+      # NOTE: probe-ollama-local and probe-agentzero-local were REMOVED
+      # 2026-04-26. They pointed at 10.0.58.100 (HOME VLAN) which is not
+      # reachable from cluster pods (firewalled). They had been firing as
+      # OllamaDown / AgentZeroDown since 2026-04-24. Workstation/AI-laptop
+      # Ollama and Agent Zero should be monitored via host-side Puppet
+      # (node_exporter on the box) once the AI laptop is running 24/7.

      # Ollama API — edge1 Pi 5 (NUC Agent Zero)
      - job_name: "probe-ollama-edge1"
@@ -372,34 +367,18 @@ data:
          - target_label: __address__
            replacement: blackbox-exporter.monitoring.svc:9115

-      # Agent Zero Web UI — local (K3s)
-      - job_name: "probe-agentzero-local"
-        metrics_path: /probe
-        params:
-          module: [http_2xx]
-        scrape_interval: 30s
-        static_configs:
-          - targets: ["http://10.0.58.100:30050/"]
-            labels:
-              instance: "agent-zero-local"
-              service: "agent-zero"
-              deployment: "local"
-        relabel_configs:
-          - source_labels: [__address__]
-            target_label: __param_target
-          - source_labels: [__param_target]
-            target_label: instance
-          - target_label: __address__
-            replacement: blackbox-exporter.monitoring.svc:9115
-
-      # Agent Zero Web UI — NUC (RKE2 via Traefik)
+      # Agent Zero Web UI — in-cluster (RKE2)
+      # Target uses short svc form (agent-zero.agent-zero.svc) NOT
+      # cluster.local FQDN — the *.cluster.local form gets rewritten to
+      # 10.0.56.200 (Traefik VIP) by the CoreDNS iamworkin.lan template +
+      # ndots:5 search-suffix expansion. Memory: feedback_coredns_ndots_template_collision.
      - job_name: "probe-agentzero-nuc"
        metrics_path: /probe
        params:
          module: [http_2xx]
        scrape_interval: 30s
        static_configs:
-          - targets: ["http://agent-zero.agent-zero.svc.cluster.local/"]
+          - targets: ["http://agent-zero.agent-zero.svc:80/"]
            labels:
              instance: "agent-zero-nuc"
              service: "agent-zero"
@@ -412,6 +391,119 @@ data:
          - target_label: __address__
            replacement: blackbox-exporter.monitoring.svc:9115

+      # =============================================================================
+      # K8s Cluster State (kube-state-metrics, cert-manager, traefik)
+      # =============================================================================
+      # Use in-cluster ClusterIP service DNS — NOT NodePorts — so a same-node
+      # NodePort hairpin doesn't break the scrape (hit on rke2-agent1 hosting
+      # both prometheus and traefik on 2026-04-26: 10.0.56.12:30900 timed out
+      # from prometheus while .11/.13 worked). NodePorts at 30900-30902 are
+      # still useful for noc1-Podman-style external scrapers, but in-cluster
+      # we should always use the svc DNS form.
+
+      # kube-state-metrics — exposes K8s object state (pods, deployments, nodes)
+      # Required for KubeContainerRestartingFrequently / KubePodNotReady alerts.
+      - job_name: "kube-state-metrics"
+        scrape_interval: 30s
+        static_configs:
+          - targets: ["kube-state-metrics.kube-system.svc:8080"]
+            labels:
+              cluster: "rke2"
+
+      # cert-manager — exposes certmanager_certificate_ready_status,
+      # certmanager_certificate_expiration_timestamp_seconds, etc. Drives the
+      # CertManagerCertificateNotReady / CertManagerCertificateRenewalFailed
+      # alerts. Memory: project_cert_manager_prometheus_scrape.
+      - job_name: "cert-manager"
+        scrape_interval: 30s
+        static_configs:
+          - targets: ["cert-manager-metrics.cert-manager.svc:9402"]
+            labels:
+              cluster: "rke2"
+
+      # Traefik — request rates, latency, TLS cert metadata, router state.
+      # ClusterIP svc routes to one of the traefik pods; per-pod scrape via
+      # the headless `traefik-metrics` selector would be nicer for failover
+      # visibility but the single-replica scrape is enough for steady-state.
+      - job_name: "traefik"
+        scrape_interval: 15s
+        static_configs:
+          - targets: ["traefik-metrics.traefik-system.svc:9100"]
+            labels:
+              service: "traefik"
+              cluster: "rke2"
+
+      # Longhorn — exposes longhorn_volume_robustness, longhorn_backup_*,
+      # longhorn_node_status_*. Enables LonghornVolumeUnhealthy +
+      # LonghornBackupFailed alerts (no real visibility into Longhorn
+      # health before this — was relying on K8s events which are noisy
+      # transient lifecycle messages, not actionable signals).
+      - job_name: "longhorn"
+        scrape_interval: 30s
+        static_configs:
+          - targets: ["longhorn-backend.longhorn-system.svc:9500"]
+            labels:
+              service: "longhorn"
+              cluster: "rke2"
+
+      # FC web services through Traefik — single probe surface to spot any
+      # iamworkin.lan host returning non-200. Uses https_internal because all
+      # certs are step-ca leaves; blackbox would x509-fail with http_2xx.
+      # Some services need explicit healthcheck paths because root returns
+      # 404 (acme, guac) or 401 (grafana, prometheus). Drop them or point at
+      # the right endpoint — don't lower valid_status_codes globally because
+      # 401 from a healthy pod and 401 from an outage look identical.
+      - job_name: "probe-traefik-services"
+        metrics_path: /probe
+        params:
+          module: [https_internal]
+        scrape_interval: 60s
+        static_configs:
+          - targets:
+              # Root-reachable services (200 or 3xx)
+              - "https://gitea.iamworkin.lan/"
+              - "https://argocd.iamworkin.lan/"
+              - "https://intranet.iamworkin.lan/"
+              - "https://signage.iamworkin.lan/"
+              - "https://kiosk.iamworkin.lan/"
+              - "https://media.iamworkin.lan/"
+              - "https://mysql.iamworkin.lan/"
+              - "https://php.iamworkin.lan/"
+              - "https://zabbix.iamworkin.lan/"
+              - "https://desktop.iamworkin.lan/"
+              - "https://print.iamworkin.lan/"
+              - "https://dns.iamworkin.lan/"
+              - "https://chat.iamworkin.lan/"
+              - "https://dist.iamworkin.lan/"
+              - "https://dms.iamworkin.lan/"
+              - "https://menuboard.iamworkin.lan/"
+              - "https://messageboard.iamworkin.lan/"
+              - "https://presentations.iamworkin.lan/"
+              - "https://retail.iamworkin.lan/"
+              - "https://ttsreader.iamworkin.lan/"
+              # Explicit healthcheck paths
+              - "https://fc-llm-bridge.iamworkin.lan/healthz"
+              - "https://acme.iamworkin.lan/health"
+              # NOTE: services intentionally NOT in this probe surface
+              #   - grafana.iamworkin.lan: every endpoint (incl. /api/health
+              #     and /login) returns 401 behind Traefik basic-auth.
+              #     Health covered by in-cluster monitoring-grafana scrape.
+              #   - prometheus.iamworkin.lan: same auth pattern. Health covered
+              #     by the prometheus self-scrape job.
+              #   - guac.iamworkin.lan: deprecated — Guacamole moved to
+              #     desktop.iamworkin.lan/guacamole/ (memory:
+              #     feedback_traefik_cross_namespace_refs_disabled).
+            labels:
+              probe_type: "traefik-service"
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            regex: "https?://([^/:]+).*"
+            target_label: instance
+          - target_label: __address__
+            replacement: blackbox-exporter.monitoring.svc:9115
+
      # =============================================================================
      # Self-monitoring (K8s monitoring namespace)
      # =============================================================================
@@ -550,6 +642,42 @@ data:
              summary: "Print queue backlog on edge2 ({{ $value }} active jobs)"
              description: "CUPS has {{ $value }} active jobs queued. Possible printer jam, USB disconnect, or paper out."

+          # Paper roll lifecycle alerts (XL Track I, 2026-04-26).
+          # Source-of-truth gauge: print_paper_remaining_percent (Print.Web OTEL,
+          # hydrated on startup from the active PaperRoll row).
+          # alert_channel=thermal_print routes through irc-notify -> Print.Web
+          # /api/print/alert so the printer announces its own paper-out warning
+          # on its remaining paper. Self-referential humor + operator nudge.
+          - alert: PrintPaperRollLow
+            expr: print_paper_remaining_percent{job="printweb-otel"} < 10 and print_paper_remaining_percent{job="printweb-otel"} > 5
+            for: 5m
+            labels:
+              severity: warning
+              alert_channel: thermal_print
+            annotations:
+              summary: "Print roll low on edge2 ({{ $value | printf \"%.1f\" }}% remaining)"
+              description: "NuPrint 210 paper roll has {{ $value | printf \"%.1f\" }}% remaining. Operator should load a fresh roll soon. Run /api/paper/status for the precise mm + estimated jobs left."
+
+          - alert: PrintPaperRollCritical
+            expr: print_paper_remaining_percent{job="printweb-otel"} <= 5
+            for: 2m
+            labels:
+              severity: critical
+              alert_channel: thermal_print
+            annotations:
+              summary: "Print roll critical on edge2 ({{ $value | printf \"%.1f\" }}% remaining)"
+              description: "NuPrint 210 paper roll at {{ $value | printf \"%.1f\" }}% — load a new roll NOW. The 50ft roll has a ~12% red-stripe zone; once paper passes that, the printer can run dry mid-job."
+
+          - alert: PrintJobDeadLetter
+            expr: increase(print_jobs_dead_letter_total[15m]) > 0
+            for: 1m
+            labels:
+              severity: warning
+              alert_channel: thermal_print
+            annotations:
+              summary: "Print job(s) entered dead-letter on edge2 ({{ $value | printf \"%.0f\" }} in last 15m)"
+              description: "{{ $value | printf \"%.0f\" }} print job(s) exhausted MaxRetries and need operator action. Open /print-log, filter Status=DeadLetter, click 'Retry From Start' after fixing the underlying cause (paper jam, USB disconnect, printer power-cycle)."
+
          - alert: CUPSHighJobRate
            expr: rate(cups_job_total[5m]) * 60 > 30
            for: 5m
@@ -589,23 +717,39 @@ data:
              summary: "RemoteDesktop /metrics scrape returning no data"
              description: "No fc_desktop_session_events_total series for 10 minutes. Either the Prometheus scrape target is misconfigured or the web deployment stopped exporting metrics. Zabbix template carries the same 10m no-data trigger for cross-monitor parity."

+          # PUBLISHER QUIRK: fc_desktop_pool_depleted / _deficit emit one
+          # series per template per status (Ready/Warming/BelowDesiredSize/
+          # Disabled), and the historical series for non-current statuses
+          # stay at their last value. So just `_depleted > 0` fires forever
+          # on any template that ever entered a bad state.
+          #
+          # SAFE PATTERN: alert only when the canonical "Ready" status
+          # gauge does NOT report ready=1 for the enabled template. This
+          # is the publisher's own canary — _ready{status="Ready"}==1 is
+          # always the current "everything is fine" signal.
          - alert: RemoteDesktopPoolDepleted
-            expr: fc_desktop_pool_depleted > 0
+            expr: |
+              group by(template) (fc_desktop_pool_ready{enabled="true"})
+              unless on(template) (fc_desktop_pool_ready{enabled="true",status="Ready"} == 1)
            for: 5m
            labels:
              severity: warning
            annotations:
-              summary: "RemoteDesktop pool {{ $labels.pool }} depleted ({{ $labels.template }})"
-              description: "Pool {{ $labels.pool }} has been depleted for 5 minutes. New launches will cold-start. Operator should check for pod-scheduling failures, image pull issues, or exhausted node capacity before warm-pool parity is expected back."
+              summary: "RemoteDesktop pool depleted ({{ $labels.template }})"
+              description: "Pool for template {{ $labels.template }} has no Ready warm pod for 5 minutes. New launches will cold-start. Check pod-scheduling failures, image pull issues, or exhausted node capacity."

+          # Same pattern, but only fires when template explicitly reports
+          # a sustained Warning-level alert state (current-status series).
          - alert: RemoteDesktopPoolDeficitSustained
-            expr: fc_desktop_pool_deficit > 0
+            expr: |
+              fc_desktop_pool_deficit{enabled="true",alert_level="Warning"} > 0
+              unless on(template) (fc_desktop_pool_ready{enabled="true",status="Ready"} == 1)
            for: 10m
            labels:
              severity: info
            annotations:
-              summary: "RemoteDesktop pool {{ $labels.pool }} below desired for 10m"
-              description: "Pool {{ $labels.pool }} has a persistent deficit of {{ $value }} warm pods. The operator is reconciling but can't reach desired size — likely an image pull, NFS affinity, or claim-init issue."
+              summary: "RemoteDesktop pool {{ $labels.template }} below desired for 10m"
+              description: "Pool {{ $labels.template }} has a persistent deficit of {{ $value }} warm pods AND no Ready series. Likely image pull, NFS affinity, or claim-init issue."

          - alert: RemoteDesktopSessionChurnSpike
            expr: sum(rate(fc_desktop_session_events_total{event="launch"}[5m])) * 60 > 20
@@ -625,8 +769,10 @@ data:
              summary: "RemoteDesktop recording events silent for 30m despite active launches"
              description: "No recording events in 30 minutes while launches are happening. Recording may be silently disabled on all templates (SessionRecordingEnabled=false), the guacd NFS mount may be unhealthy, or the retention sweep isn't emitting events. Not an error by itself — worth checking."

+          # Match by job — instance label carries full URL incl. /health,
+          # not just hostname, so a hostname-only match never fires.
          - alert: RemoteDesktopTlsExpiry
-            expr: probe_ssl_earliest_cert_expiry{instance="https://desktop.iamworkin.lan"} - time() < 2 * 86400
+            expr: probe_ssl_earliest_cert_expiry{job="probe-remotedesktop"} - time() < 2 * 86400
            for: 6h
            labels:
              severity: critical
@@ -713,13 +859,16 @@ data:
            annotations:
              summary: "Epson ink CRITICAL: {{ $labels.prtMarkerSuppliesDescription }} at {{ $value }}%"

+          # for: 30m absorbs sleep cycles. The EcoTank sleeps after ~5 min
+          # of idle and SNMP times out, so 5m for: would page nightly. A
+          # genuine printer outage (jam, disconnected) lasts well over 30m.
          - alert: EpsonPrinterDown
            expr: up{job="snmp-printer"} == 0
-            for: 5m
+            for: 30m
            labels:
              severity: warning
            annotations:
-              summary: "Epson ET-3750 SNMP unreachable"
+              summary: "Epson ET-3750 SNMP unreachable for >30m (likely actual fault, not sleep)"

          - alert: SynologyDiskLow
            expr: hrStorageUsed{job="snmp-nas"} / hrStorageSize{job="snmp-nas"} * 100 > 85
@@ -773,6 +922,108 @@ data:
            annotations:
              summary: "Disk usage high on {{ $labels.instance }} ({{ $value | printf \"%.1f\" }}%)"

+      # K8s pod-state alerts. Require kube-state-metrics scrape (added
+      # 2026-04-26 — see scrape_configs above). Would have surfaced the
+      # agent-zero ollama-proxy 172x crash-loop instead of letting it
+      # silently churn for ~3 days.
+      - name: kubernetes-state
+        rules:
+          - alert: KubeContainerRestartingFrequently
+            expr: increase(kube_pod_container_status_restarts_total[1h]) > 5
+            for: 15m
+            labels:
+              severity: warning
+            annotations:
+              summary: "{{ $labels.namespace }}/{{ $labels.pod }} container {{ $labels.container }} restarting >5x/hr"
+              description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} has restarted {{ $value | printf \"%.0f\" }} times in the last hour. Check 'kubectl describe pod' + last-state termination reason."
+
+          - alert: KubeContainerCrashLooping
+            expr: increase(kube_pod_container_status_restarts_total[15m]) > 3
+            for: 5m
+            labels:
+              severity: critical
+              alert_channel: thermal_print
+            annotations:
+              summary: "{{ $labels.namespace }}/{{ $labels.pod }} crashlooping ({{ $value | printf \"%.0f\" }} restarts/15m)"
+              description: "Container {{ $labels.container }} restarted {{ $value | printf \"%.0f\" }} times in 15 minutes — actively crashlooping."
+
+          - alert: KubePodNotReady
+            expr: sum by(namespace, pod) (kube_pod_status_phase{phase=~"Pending|Failed|Unknown"}) > 0
+            for: 15m
+            labels:
+              severity: warning
+            annotations:
+              summary: "{{ $labels.namespace }}/{{ $labels.pod }} not Ready for >15m"
+              description: "Pod is in a non-Running, non-Succeeded phase for over 15 minutes. Common causes: ImagePullBackOff (registry/Nexus down, wrong image tag), pending PVC, scheduling failure (taint/resources)."
+
+          - alert: KubePodImagePullBackOff
+            expr: sum by(namespace, pod) (kube_pod_container_status_waiting_reason{reason=~"ImagePullBackOff|ErrImagePull"}) > 0
+            for: 10m
+            labels:
+              severity: warning
+            annotations:
+              summary: "{{ $labels.namespace }}/{{ $labels.pod }} ImagePullBackOff for >10m"
+              description: "Pod can't pull image. Check the image ref (often a stale tag or unreachable registry) and clean up if it's an orphan."
+
+          - alert: KubeDeploymentReplicasMismatch
+            expr: kube_deployment_spec_replicas != kube_deployment_status_replicas_available
+            for: 15m
+            labels:
+              severity: warning
+            annotations:
+              summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
+              description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
+
+      # Longhorn storage health alerts. Required: longhorn scrape job
+      # (added 2026-04-26 — see scrape_configs above). The K8s events
+      # for "snapshot becomes not ready to use" are transient lifecycle
+      # noise, not actionable — these alerts use the actual Longhorn
+      # gauges that reflect persistent state.
+      - name: longhorn-storage
+        rules:
+          # Volume robustness: 0=unknown, 1=healthy, 2=degraded, 3=faulted.
+          # Detached volumes report 0 — that's normal for unattached PVCs,
+          # so filter to only attached.
+          - alert: LonghornVolumeDegraded
+            expr: longhorn_volume_robustness{robustness="degraded"} == 1
+            for: 15m
+            labels:
+              severity: warning
+            annotations:
+              summary: "Longhorn volume {{ $labels.volume }} degraded for >15m"
+              description: "Volume {{ $labels.volume }} on node {{ $labels.node }} has been degraded (one or more replicas unhealthy) for 15+ minutes. Auto-rebuild may need help — check 'kubectl describe volume.longhorn.io {{ $labels.volume }} -n longhorn-system'."
+
+          - alert: LonghornVolumeFaulted
+            expr: longhorn_volume_robustness{robustness="faulted"} == 1
+            for: 5m
+            labels:
+              severity: critical
+              alert_channel: thermal_print
+            annotations:
+              summary: "Longhorn volume {{ $labels.volume }} FAULTED"
+              description: "Volume {{ $labels.volume }} on node {{ $labels.node }} is faulted — all replicas unavailable. Data inaccessible. Manual intervention required."
+
+          # No backup in 36h indicates the daily-backup recurringJob is
+          # silently failing. Allows for one missed run + slack.
+          - alert: LonghornBackupStale
+            expr: |
+              (time() - max by(volume) (longhorn_backup_state{state="Completed"} * on(backup) group_left() longhorn_backup_actual_size_bytes)) > 36 * 3600
+            for: 1h
+            labels:
+              severity: warning
+            annotations:
+              summary: "Longhorn volume {{ $labels.volume }} has no completed backup in >36h"
+              description: "Daily backup recurringJob (cron 0 2 * * *) appears to have skipped this volume. Check 'kubectl get backups.longhorn.io -n longhorn-system' and the daily-backup CronJob logs."
+
+          - alert: LonghornNodeUnhealthy
+            expr: longhorn_node_status{condition="ready",condition_reason!=""} == 0
+            for: 5m
+            labels:
+              severity: warning
+            annotations:
+              summary: "Longhorn node {{ $labels.node }} not Ready"
+              description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
+
 # =============================================================================
 # ConfigMap: Blackbox Exporter Configuration
 # =============================================================================
@@ -804,6 +1055,22 @@ data:
          fail_if_body_not_matches_regexp:
            - '"models"'
          preferred_ip_protocol: ip4
+      # https_internal — for Traefik-fronted services with step-ca leaf
+      # certs. blackbox does not trust the step-ca root CA, so http_2xx
+      # against any *.iamworkin.lan host fails with x509 unknown authority.
+      # Redirects + multiple status codes are accepted because some hosts
+      # 302 to /login or /scalar.
+      https_internal:
+        prober: http
+        timeout: 10s
+        http:
+          valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
+          valid_status_codes: [200, 301, 302, 303, 307, 308]
+          method: GET
+          follow_redirects: true
+          preferred_ip_protocol: ip4
+          tls_config:
+            insecure_skip_verify: true

 # =============================================================================
 # ConfigMap: IRC Notify Script
@@ -4026,6 +4293,39 @@ spec:
          protocol: TCP
        - port: 8443
          protocol: TCP
+    # Traefik /metrics endpoint (port 9100) — separate from the data-path
+    # ports above. Required for the in-cluster `traefik` scrape job.
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: traefik-system
+      ports:
+        - port: 9100
+          protocol: TCP
+    # kube-state-metrics — required for kubernetes-state alert group.
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: kube-system
+      ports:
+        - port: 8080
+          protocol: TCP
+    # cert-manager metrics — required for CertManagerCertificate* alerts.
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: cert-manager
+      ports:
+        - port: 9402
+          protocol: TCP
+    # Longhorn manager metrics — required for Longhorn* alerts.
+    - to:
+        - namespaceSelector:
+            matchLabels:
+              kubernetes.io/metadata.name: longhorn-system
+      ports:
+        - port: 9500
+          protocol: TCP
    # IRC (irc-notify → UnrealIRCd in irc namespace via K8s DNS)
    - to:
        - namespaceSelector:
--- a/apps/noc-services/noc-services.yaml
+++ b/apps/noc-services/noc-services.yaml
@@ -219,6 +219,65 @@ spec:
  tls:
    secretName: cockpit-tls
 ---
+# ============================================================
+# PuppetDB Dashboard - noc1:8080 (HTTP, web UI only)
+# Agent-to-PuppetDB mTLS still uses port 8081 directly via Puppet CA
+# (NOT via this proxy). See docs/infrastructure/cert-recovery-2026-04-28.md
+# ============================================================
+apiVersion: v1
+kind: Service
+metadata:
+  name: puppetdb-external
+  namespace: noc-proxy
+spec:
+  ports:
+    - port: 8080
+      targetPort: 8080
+      name: http
+  clusterIP: None
+---
+apiVersion: v1
+kind: Endpoints
+metadata:
+  name: puppetdb-external
+  namespace: noc-proxy
+subsets:
+  - addresses:
+      - ip: 10.0.56.10
+    ports:
+      - port: 8080
+        name: http
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: puppetdb-tls
+  namespace: noc-proxy
+spec:
+  secretName: puppetdb-tls
+  issuerRef:
+    name: step-ca-acme
+    kind: ClusterIssuer
+  dnsNames:
+    - puppetdb.iamworkin.lan
+---
+apiVersion: traefik.io/v1alpha1
+kind: IngressRoute
+metadata:
+  name: puppetdb
+  namespace: noc-proxy
+spec:
+  entryPoints:
+    - websecure
+  routes:
+    - kind: Rule
+      match: Host(`puppetdb.iamworkin.lan`)
+      services:
+        - name: puppetdb-external
+          port: 8080
+  tls:
+    secretName: puppetdb-tls
+---
 # NetworkPolicy: allow Traefik ingress, allow egress to noc1
 apiVersion: networking.k8s.io/v1
 kind: NetworkPolicy
@@ -242,6 +301,8 @@ spec:
      ports:
        - port: 3000
          protocol: TCP
+        - port: 8080
+          protocol: TCP
        - port: 9090
          protocol: TCP
        - port: 9091
--- a/apps/telephony/telephony.yaml
+++ b/apps/telephony/telephony.yaml
@@ -147,8 +147,8 @@ spec:
                  app: asterisk
              topologyKey: kubernetes.io/hostname
      containers:
-        - name: telephony-web
-          image: localhost/fc-telephony-web:v202604170153
+        - name: telephony-web
+          image: localhost/fc-telephony-web:v202604252156
          imagePullPolicy: Never
          securityContext:
            readOnlyRootFilesystem: true