diff --git a/apps/agent-zero/agent-zero.yaml b/apps/agent-zero/agent-zero.yaml index 04ca1f9..2b92c9d 100644 --- a/apps/agent-zero/agent-zero.yaml +++ b/apps/agent-zero/agent-zero.yaml @@ -92,14 +92,14 @@ subjects: # ============================================================================= # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile) # ============================================================================= -# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only -# Blue Jay profile with 21 tools, 3 prompts, 4 extensions +# Connects directly to fc-llm-bridge for chat + util + embeddings + browser. +# Blue Jay profile with 21 tools, 3 prompts, 4 extensions. --- -# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing). +# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing). # Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s). -# Consumed by the chat_model only; util / embedding / browser stay on local -# Ollama via the 127.0.0.1 sidecar proxy. +# Consumed by chat, util, embeddings, browser, and corpus-search requests +# that traverse fc-llm-bridge. apiVersion: onepassword.com/v1 kind: OnePasswordItem metadata: @@ -150,19 +150,18 @@ spec: spec: serviceAccountName: agent-zero initContainers: - # Wait for edge1 Ollama to be reachable before starting Agent Zero. - # (Workstation Ollama is intentionally NOT in the cluster path.) - - name: wait-for-ollama + # Wait for fc-llm-bridge to be reachable before starting Agent Zero. + - name: wait-for-llm-bridge image: busybox:1.37 command: ["sh", "-c"] args: - | - echo "Waiting for edge1 Ollama (10.0.57.17:11434)..." - until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do - echo "edge1 Ollama not ready yet, retrying in 5s..." + echo "Waiting for fc-llm-bridge..." + until wget -qO- --timeout=2 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz >/dev/null 2>&1; do + echo "fc-llm-bridge not ready yet, retrying in 5s..." sleep 5 done - echo "edge1 Ollama is reachable." + echo "fc-llm-bridge is reachable." # Assemble the Blue Jay profile directory structure from ConfigMaps. # ConfigMaps can't create nested dirs, so we copy into the workspace PVC. - name: setup-bluejay @@ -209,73 +208,6 @@ spec: - name: bluejay-theme mountPath: /tmp/bluejay-theme containers: - - name: ollama-proxy - image: nginx:1.27-alpine - command: ["/bin/sh", "-c"] - args: - - | - cat > /etc/nginx/nginx.conf <<'NGINX' - worker_processes 1; - events { worker_connections 1024; } - http { - upstream ollama_upstream { - # edge1 Pi 5 + AI HAT+ is the SOLE upstream. - # Workstation Ollama (BLUEJAY-WS) is private dev hardware and - # MUST NOT be added back here without explicit operator decision — - # adding it would expose the workstation to cluster traffic. - server 10.0.57.17:11434 max_fails=2 fail_timeout=10s; - keepalive 16; - } - server { - listen 11434; - # Local healthcheck — proves nginx itself is alive. - # Must NOT depend on upstream so liveness doesn't restart - # the container when edge1 is slow/offline. - location = /healthz { - access_log off; - return 200 'ok\n'; - default_type text/plain; - } - location / { - proxy_http_version 1.1; - proxy_set_header Connection ""; - proxy_set_header Host $host; - proxy_connect_timeout 5s; - proxy_read_timeout 600s; - proxy_send_timeout 600s; - proxy_next_upstream error timeout invalid_header http_502 http_503 http_504; - proxy_pass http://ollama_upstream; - } - } - } - NGINX - exec nginx -g 'daemon off;' - ports: - - containerPort: 11434 - # Readiness probe DOES check upstream so K8s only routes traffic - # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's - # slower TCP handshake under load (was timeoutSeconds=1 default → - # 172 historic restarts when the workstation primary path went down, - # before the cluster was repointed to edge1-only on 2026-04-27). - readinessProbe: - httpGet: - path: /api/tags - port: 11434 - initialDelaySeconds: 5 - periodSeconds: 15 - timeoutSeconds: 5 - failureThreshold: 3 - # Liveness probe hits ONLY local healthz — restarts the container - # only when nginx itself is dead. Decoupling liveness from upstream - # eliminates restart-loops caused by transient upstream outages. - livenessProbe: - httpGet: - path: /healthz - port: 11434 - initialDelaySeconds: 10 - periodSeconds: 30 - timeoutSeconds: 3 - failureThreshold: 3 - name: agent-zero image: agent0ai/agent-zero:latest command: ["/bin/bash", "-c"] @@ -296,12 +228,11 @@ spec: # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat, # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet). # api_key comes from A0_SET_chat_model_api_key env var (overrides - # config.json). util + embedding go to local 127.0.0.1 nginx - # proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation - # is private dev hardware, intentionally not in the cluster path). + # config.json). Utility / embedding / browser all point at the + # same bridge root and use Ollama-compatible endpoints there. mkdir -p /a0/usr/plugins/_model_config cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' - {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}} + {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","kwargs":{}}} MODELCFG # Strip heredoc indentation sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json @@ -325,8 +256,8 @@ spec: # Chat model — routed through FlowerCore LLM Bridge (ADR-088) # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep) # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint. - # Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy - # for zero-latency, zero-cost small-model traffic. + # Utility / embedding / browser now traverse fc-llm-bridge too so + # Agent Zero no longer needs a local Ollama proxy sidecar. - name: A0_SET_chat_model_provider value: "openai" - name: A0_SET_chat_model_name @@ -348,6 +279,11 @@ spec: secretKeyRef: name: fc-llm-bridge-api-keys key: agent-zero-k8s + - name: FC_LLM_BRIDGE_API_KEY + valueFrom: + secretKeyRef: + name: fc-llm-bridge-api-keys + key: agent-zero-k8s - name: A0_SET_chat_model_ctx_length value: "8192" - name: A0_SET_chat_model_kwargs @@ -358,7 +294,12 @@ spec: - name: A0_SET_util_model_name value: "qwen2.5:1.5b" - name: A0_SET_util_model_api_base - value: "http://127.0.0.1:11434" + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" + - name: A0_SET_util_model_api_key + valueFrom: + secretKeyRef: + name: fc-llm-bridge-api-keys + key: agent-zero-k8s - name: A0_SET_util_model_kwargs value: '{"num_ctx": 2048}' # Embedding model — nomic through the same proxy @@ -367,16 +308,30 @@ spec: - name: A0_SET_embed_model_name value: "nomic-embed-text" - name: A0_SET_embed_model_api_base - value: "http://127.0.0.1:11434" + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" + - name: A0_SET_embed_model_api_key + valueFrom: + secretKeyRef: + name: fc-llm-bridge-api-keys + key: agent-zero-k8s # Browser model — small Gemma candidate through the same proxy - name: A0_SET_browser_model_provider value: "ollama" - name: A0_SET_browser_model_name value: "gemma3:4b" - name: A0_SET_browser_model_api_base - value: "http://127.0.0.1:11434" + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" + - name: A0_SET_browser_model_api_key + valueFrom: + secretKeyRef: + name: fc-llm-bridge-api-keys + key: agent-zero-k8s - name: A0_SET_browser_model_vision value: "true" + - name: OLLAMA_HOST + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" + - name: FLOWERCORE_AGENTZERO_OLLAMA_URL + value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080" # Agent profile — Blue Jay personality, tools, and system prompt - name: A0_SET_agent_profile value: "bluejay" @@ -452,7 +407,7 @@ spec: command: - /bin/bash - -c - - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null" + - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://fc-llm-bridge.fc-llm-bridge.svc:8080/healthz > /dev/null" periodSeconds: 30 failureThreshold: 2 resources: @@ -590,13 +545,6 @@ spec: protocol: UDP - port: 53 protocol: TCP - # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation - # is private dev hardware and intentionally not allowlisted) - - to: - - ipBlock: - cidr: 10.0.57.17/32 - ports: - - port: 11434 # Print.Web on edge2 - to: - ipBlock: diff --git a/apps/agent-zero/configmaps-bluejay.yaml b/apps/agent-zero/configmaps-bluejay.yaml index 75d80bc..d5c0974 100644 --- a/apps/agent-zero/configmaps-bluejay.yaml +++ b/apps/agent-zero/configmaps-bluejay.yaml @@ -7209,6 +7209,9 @@ data: "keep_alive": keep_alive, "stream": False, }) + curl_headers = ["-H", "Content-Type: application/json"] + if os.environ.get("FC_LLM_BRIDGE_API_KEY"): + curl_headers.extend(["-H", f"X-Api-Key: {os.environ['FC_LLM_BRIDGE_API_KEY']}"]) try: result = subprocess.run( @@ -7216,7 +7219,7 @@ data: "curl", "-s", "--max-time", "120", "-X", "POST", f"{api_base}/api/generate", - "-H", "Content-Type: application/json", + *curl_headers, "-d", payload, ], capture_output=True, @@ -13191,6 +13194,7 @@ data: "FLOWERCORE_AGENTZERO_OLLAMA_URL", "http://host.containers.internal:11434", ) + BRIDGE_API_KEY = os.environ.get("FC_LLM_BRIDGE_API_KEY", "").strip() EMBEDDING_MODEL = os.environ.get( "FLOWERCORE_FLEET_EMBEDDING_MODEL", "nomic-embed-text", @@ -13327,10 +13331,13 @@ data: def _embed(text: str) -> list: """Embed a query via Ollama's /api/embeddings. Single-vector response.""" body = json.dumps({"model": EMBEDDING_MODEL, "prompt": text}).encode("utf-8") + headers = {"Content-Type": "application/json"} + if BRIDGE_API_KEY: + headers["X-Api-Key"] = BRIDGE_API_KEY req = urllib.request.Request( f"{OLLAMA_BASE_URL.rstrip('/')}/api/embeddings", data=body, - headers={"Content-Type": "application/json"}, + headers=headers, ) with urllib.request.urlopen(req, timeout=60) as resp: data = json.loads(resp.read().decode("utf-8")) diff --git a/apps/fc-llm-bridge/fc-llm-bridge.yaml b/apps/fc-llm-bridge/fc-llm-bridge.yaml index 7ac9ea5..2c81901 100644 --- a/apps/fc-llm-bridge/fc-llm-bridge.yaml +++ b/apps/fc-llm-bridge/fc-llm-bridge.yaml @@ -116,6 +116,10 @@ spec: value: "default" - name: FlowerCore__LlmBridge__DefaultAppName value: "agent-zero" + - name: FlowerCore__LlmBridge__UtilModel + value: "qwen2.5:1.5b" + - name: FlowerCore__LlmBridge__EmbedModel + value: "nomic-embed-text" # Per-consumer API keys — from OnePasswordItem fc-llm-bridge-api-keys. # Each field becomes a Secret key of the same name. The key-name # lands in the auth principal's `fc.app` claim for ledger scoping.