fix(agent-zero): prefix bridge embedding alias for litellm

fix(agent-zero): keep internal util/embed on bridge v1
chore(bridge): bump fc-llm-bridge image tag v202604292028
2026-04-29 21:14:12 -05:00 · 2026-04-29 21:09:04 -05:00 · 2026-04-29 20:50:55 -05:00 · 2026-04-29 20:50:55 -05:00
3 changed files with 59 additions and 108 deletions
--- a/apps/agent-zero/agent-zero.yaml
+++ b/apps/agent-zero/agent-zero.yaml
@@ -92,16 +92,17 @@ subjects:
 # =============================================================================
 # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
 # =============================================================================
-# Chat / utility / embedding lanes route through fc-llm-bridge. Browser keeps
+# Connects directly to fc-llm-bridge for chat + internal util/embed + browser.
-# a local nginx proxy to edge1 Pi 5 + AI HAT+ until the bridge grows a live
+# Agent Zero's internal util/embed slots stay on the bridge's OpenAI-compatible
-# Vision route and the in-pod tools stop calling Ollama directly.
+# /v1 surface, while browser + corpus-search use the Ollama-compatible /api/*
-# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
+# surface through OLLAMA_HOST.
 # Blue Jay profile with 21 tools, 3 prompts, 4 extensions.
 ---
-# FC LLM Bridge API key for Agent Zero (ADR-088 chat / util / embed routing).
+# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
 # Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
-# Consumed by the OpenAI-compatible chat / util / embedding lanes. Browser
+# Consumed by chat, internal util/embed, browser, and corpus-search requests
-# stays on the local Ollama sidecar until fc:vision is configured on the bridge.
+# that traverse fc-llm-bridge.
 apiVersion: onepassword.com/v1
 kind: OnePasswordItem
 metadata:
@@ -137,7 +138,7 @@ metadata:
  annotations:
    agent-zero/deployment: "nuc"
    agent-zero/profile: "bluejay"
-    agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
+    agent-zero/ollama: "fc-llm-bridge fronts edge1 Pi 5 + AI HAT+ Ollama for cluster browser/corpus-search traffic; internal chat/util/embed route through the bridge's authenticated OpenAI surface"
 spec:
  replicas: 1
  selector:
@@ -152,19 +153,18 @@ spec:
    spec:
      serviceAccountName: agent-zero
      initContainers:
-        # Wait for edge1 Ollama to be reachable before starting Agent Zero.
+        # Wait for fc-llm-bridge to be reachable before starting Agent Zero.
-        # (Workstation Ollama is intentionally NOT in the cluster path.)
+        - name: wait-for-llm-bridge
        - name: wait-for-ollama
          image: busybox:1.37
          command: ["sh", "-c"]
          args:
            - |
-              echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
+              echo "Waiting for fc-llm-bridge..."
-              until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
+              until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do
-                echo "edge1 Ollama not ready yet, retrying in 5s..."
+                echo "fc-llm-bridge not ready yet, retrying in 5s..."
                sleep 5
              done
-              echo "edge1 Ollama is reachable."
+              echo "fc-llm-bridge is reachable."
        # Assemble the Blue Jay profile directory structure from ConfigMaps.
        # ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
        - name: setup-bluejay
@@ -211,73 +211,6 @@ spec:
            - name: bluejay-theme
              mountPath: /tmp/bluejay-theme
      containers:
        - name: ollama-proxy
          image: nginx:1.27-alpine
          command: ["/bin/sh", "-c"]
          args:
            - |
              cat > /etc/nginx/nginx.conf <<'NGINX'
              worker_processes  1;
              events { worker_connections 1024; }
              http {
                upstream ollama_upstream {
                  # edge1 Pi 5 + AI HAT+ is the SOLE upstream.
                  # Workstation Ollama (BLUEJAY-WS) is private dev hardware and
                  # MUST NOT be added back here without explicit operator decision —
                  # adding it would expose the workstation to cluster traffic.
                  server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
                  keepalive 16;
                }
                server {
                  listen 11434;
                  # Local healthcheck — proves nginx itself is alive.
                  # Must NOT depend on upstream so liveness doesn't restart
                  # the container when edge1 is slow/offline.
                  location = /healthz {
                    access_log off;
                    return 200 'ok\n';
                    default_type text/plain;
                  }
                  location / {
                    proxy_http_version 1.1;
                    proxy_set_header Connection "";
                    proxy_set_header Host $host;
                    proxy_connect_timeout 5s;
                    proxy_read_timeout 600s;
                    proxy_send_timeout 600s;
                    proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
                    proxy_pass http://ollama_upstream;
                  }
                }
              }
              NGINX
              exec nginx -g 'daemon off;'
          ports:
            - containerPort: 11434
          # Readiness probe DOES check upstream so K8s only routes traffic
          # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
          # slower TCP handshake under load (was timeoutSeconds=1 default →
          # 172 historic restarts when the workstation primary path went down,
          # before the cluster was repointed to edge1-only on 2026-04-27).
          readinessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 5
            periodSeconds: 15
            timeoutSeconds: 5
            failureThreshold: 3
          # Liveness probe hits ONLY local healthz — restarts the container
          # only when nginx itself is dead. Decoupling liveness from upstream
          # eliminates restart-loops caused by transient upstream outages.
          livenessProbe:
            httpGet:
              path: /healthz
              port: 11434
            initialDelaySeconds: 10
            periodSeconds: 30
            timeoutSeconds: 3
            failureThreshold: 3
        - name: agent-zero
          image: agent0ai/agent-zero:latest
          command: ["/bin/bash", "-c"]
@@ -297,14 +230,13 @@ spec:
              # The _model_config plugin reads config.json (NOT config.yaml).
              # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
              # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
-              # api_key comes from OPENAI_API_KEY / A0_SET_chat_model_api_key.
+              # api_key comes from A0_SET_chat_model_api_key env var (overrides
-              # Utility + embedding now share the same bridge surface so Agent
+              # config.json). Utility + embedding stay on the authenticated
-              # Zero stops talking to Ollama directly for those model lanes.
+              # OpenAI-compatible /v1 surface; browser and direct tool traffic
-              # Browser stays on the local 127.0.0.1 proxy until the bridge has
+              # use the bridge's Ollama-compatible root via OLLAMA_HOST.
              # a live Vision route and the in-pod tools stop calling Ollama.
              mkdir -p /a0/usr/plugins/_model_config
              cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
-              {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
+              {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
              MODELCFG
              # Strip heredoc indentation
              sed -i 's/^              //' /a0/usr/plugins/_model_config/config.json
@@ -328,9 +260,9 @@ spec:
            # Chat model — routed through FlowerCore LLM Bridge (ADR-088)
            # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
            # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
-            # Utility + embedding now share the bridge/auth surface too.
+            # Internal utility + embedding use the authenticated OpenAI surface,
-            # Browser stays on local Ollama until the bridge has a live
+            # while browser/corpus-search use the bridge's Ollama-compatible
-            # Vision route and the in-pod tools stop calling Ollama directly.
+            # endpoints so Agent Zero no longer needs a local proxy sidecar.
            - name: A0_SET_chat_model_provider
              value: "openai"
            - name: A0_SET_chat_model_name
@@ -352,11 +284,16 @@ spec:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
            - name: FC_LLM_BRIDGE_API_KEY
              valueFrom:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
            - name: A0_SET_chat_model_ctx_length
              value: "8192"
            - name: A0_SET_chat_model_kwargs
              value: '{"temperature": 0, "num_ctx": 8192}'
-            # Utility model — fast small helper tier through the same proxy
+            # Utility model — fast small helper tier through the OpenAI surface
            - name: A0_SET_util_model_provider
              value: "openai"
            - name: A0_SET_util_model_name
@@ -365,23 +302,33 @@ spec:
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            - name: A0_SET_util_model_kwargs
              value: '{"num_ctx": 2048}'
-            # Embedding model — bridge alias to nomic-embed-text on edge1
+            # Embedding model — authenticated bridge alias to nomic-embed-text.
            # LiteLLM's embedding() path needs an explicit provider prefix here
            # even though the chat slot can use bare fc:* aliases.
            - name: A0_SET_embed_model_provider
              value: "openai"
            - name: A0_SET_embed_model_name
-              value: "fc:embedding"
+              value: "openai/fc:embedding"
            - name: A0_SET_embed_model_api_base
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
-            # Browser model — small Gemma candidate stays on the local proxy
+            # Browser model — small Gemma candidate through the same proxy
            # until fc:vision is configured on the bridge.
            - name: A0_SET_browser_model_provider
              value: "ollama"
            - name: A0_SET_browser_model_name
              value: "gemma3:4b"
            - name: A0_SET_browser_model_api_base
-              value: "http://127.0.0.1:11434"
+              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
            - name: A0_SET_browser_model_api_key
              valueFrom:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
            - name: A0_SET_browser_model_vision
              value: "true"
            - name: OLLAMA_HOST
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
            - name: FLOWERCORE_AGENTZERO_OLLAMA_URL
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
            # Agent profile — Blue Jay personality, tools, and system prompt
            - name: A0_SET_agent_profile
              value: "bluejay"
@@ -457,7 +404,7 @@ spec:
              command:
                - /bin/bash
                - -c
-                - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
+                - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null"
            periodSeconds: 30
            failureThreshold: 2
          resources:
@@ -595,13 +542,6 @@ spec:
          protocol: UDP
        - port: 53
          protocol: TCP
    # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
    # is private dev hardware and intentionally not allowlisted)
    - to:
        - ipBlock:
            cidr: 10.0.57.17/32
      ports:
        - port: 11434
    # Print.Web on edge2
    - to:
        - ipBlock:
--- a/apps/agent-zero/configmaps-bluejay.yaml
+++ b/apps/agent-zero/configmaps-bluejay.yaml
@@ -7209,6 +7209,9 @@ data:
            "keep_alive": keep_alive,
            "stream": False,
        })
        curl_headers = ["-H", "Content-Type: application/json"]
        if os.environ.get("FC_LLM_BRIDGE_API_KEY"):
            curl_headers.extend(["-H", f"X-Api-Key: {os.environ['FC_LLM_BRIDGE_API_KEY']}"])
        try:
            result = subprocess.run(
@@ -7216,7 +7219,7 @@ data:
                    "curl", "-s", "--max-time", "120",
                    "-X", "POST",
                    f"{api_base}/api/generate",
-                    "-H", "Content-Type: application/json",
+                    *curl_headers,
                    "-d", payload,
                ],
                capture_output=True,
@@ -13191,6 +13194,7 @@ data:
        "FLOWERCORE_AGENTZERO_OLLAMA_URL",
        "http://host.containers.internal:11434",
    )
    BRIDGE_API_KEY = os.environ.get("FC_LLM_BRIDGE_API_KEY", "").strip()
    EMBEDDING_MODEL = os.environ.get(
        "FLOWERCORE_FLEET_EMBEDDING_MODEL",
        "nomic-embed-text",
@@ -13327,10 +13331,13 @@ data:
    def _embed(text: str) -> list:
        """Embed a query via Ollama's /api/embeddings. Single-vector response."""
        body = json.dumps({"model": EMBEDDING_MODEL, "prompt": text}).encode("utf-8")
        headers = {"Content-Type": "application/json"}
        if BRIDGE_API_KEY:
            headers["X-Api-Key"] = BRIDGE_API_KEY
        req = urllib.request.Request(
            f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings",
            data=body,
-            headers={"Content-Type": "application/json"},
+            headers=headers,
        )
        with urllib.request.urlopen(req, timeout=60) as resp:
            data = json.loads(resp.read().decode("utf-8"))
--- a/apps/fc-llm-bridge/fc-llm-bridge.yaml
+++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml
@@ -97,7 +97,7 @@ spec:
          #   dotnet.exe publish -c Release -o deploy/app \
          #     src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
          #   podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
-          image: localhost/fc-llm-bridge:v202604231520
+          image: localhost/fc-llm-bridge:v202604292028
          imagePullPolicy: Never
          ports:
            - containerPort: 8080
@@ -116,6 +116,10 @@ spec:
              value: "default"
            - name: FlowerCore__LlmBridge__DefaultAppName
              value: "agent-zero"
            - name: FlowerCore__LlmBridge__UtilModel
              value: "qwen2.5:1.5b"
            - name: FlowerCore__LlmBridge__EmbedModel
              value: "nomic-embed-text"
            # Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys.
            # Each field becomes a Secret key of the same name. The key-name
            # lands in the auth principal's `fc.app` claim for ledger scoping.
Author	SHA1	Message	Date
Andrew Stoltz	b1ad253dd6	fix(agent-zero): prefix bridge embedding alias for litellm	2026-04-29 21:14:12 -05:00
Andrew Stoltz	ee935f6e07	fix(agent-zero): keep internal util/embed on bridge v1	2026-04-29 21:09:04 -05:00
Andrew Stoltz	2853ee2024	chore(bridge): bump fc-llm-bridge image tag v202604292028	2026-04-29 20:50:55 -05:00
Andrew Stoltz	b4a34e16ca	refactor(agent-zero): drop ollama-proxy sidecar (Phase 3)	2026-04-29 20:50:55 -05:00