diff --git a/apps/agent-zero/agent-zero.yaml b/apps/agent-zero/agent-zero.yaml index 79fb95f..824ccec 100644 --- a/apps/agent-zero/agent-zero.yaml +++ b/apps/agent-zero/agent-zero.yaml @@ -2,14 +2,15 @@ # Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal) # ============================================================================= # Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile -# Ollama: workstation-first via BLUEJAY-WS (10.0.56.20:11434) with edge1 Pi 5 -# fallback (10.0.57.17:11434) +# Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434). +# Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream — +# the workstation is private dev hardware, not a cluster dependency. # Target: RKE2 bare-metal cluster, namespace: agent-zero # Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme) # # Differences from LOCAL (WSL K3s): # - Uses Longhorn StorageClass (not local-path) -# - Prefers workstation Ollama on the R9700, falls back to edge1 Pi 5 +# - Cluster-only Ollama path (edge1) — keeps workstation private # - NO Anthropic API key (free/local models only) # - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed) # - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps @@ -91,7 +92,7 @@ subjects: # ============================================================================= # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile) # ============================================================================= -# Connects to a local proxy that routes to workstation Ollama first and edge1 second +# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only # Blue Jay profile with 21 tools, 3 prompts, 4 extensions --- @@ -118,7 +119,7 @@ metadata: annotations: agent-zero/deployment: "nuc" agent-zero/profile: "bluejay" - agent-zero/ollama: "BLUEJAY-WS primary (10.0.56.20:11434), edge1 fallback (10.0.57.17:11434)" + agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency" spec: replicas: 1 selector: @@ -133,19 +134,19 @@ spec: spec: serviceAccountName: agent-zero initContainers: - # Wait for either workstation or edge1 Ollama to be reachable before starting Agent Zero. + # Wait for edge1 Ollama to be reachable before starting Agent Zero. + # (Workstation Ollama is intentionally NOT in the cluster path.) - name: wait-for-ollama image: busybox:1.37 command: ["sh", "-c"] args: - | - echo "Waiting for Ollama at BLUEJAY-WS or edge1..." - until wget -qO- --timeout=2 http://10.0.56.20:11434/api/tags >/dev/null 2>&1 || \ - wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do - echo "No Ollama endpoint ready yet, retrying in 5s..." + echo "Waiting for edge1 Ollama (10.0.57.17:11434)..." + until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do + echo "edge1 Ollama not ready yet, retrying in 5s..." sleep 5 done - echo "At least one Ollama endpoint is reachable." + echo "edge1 Ollama is reachable." # Assemble the Blue Jay profile directory structure from ConfigMaps. # ConfigMaps can't create nested dirs, so we copy into the workspace PVC. - name: setup-bluejay @@ -202,16 +203,18 @@ spec: events { worker_connections 1024; } http { upstream ollama_upstream { - server 10.0.56.20:11434 max_fails=2 fail_timeout=10s; - server 10.0.57.17:11434 backup; + # edge1 Pi 5 + AI HAT+ is the SOLE upstream. + # Workstation Ollama (BLUEJAY-WS) is private dev hardware and + # MUST NOT be added back here without explicit operator decision — + # adding it would expose the workstation to cluster traffic. + server 10.0.57.17:11434 max_fails=2 fail_timeout=10s; keepalive 16; } server { listen 11434; # Local healthcheck — proves nginx itself is alive. # Must NOT depend on upstream so liveness doesn't restart - # the container when BLUEJAY-WS Ollama is slow/offline - # and nginx is mid-failover to the edge1 backup. + # the container when edge1 is slow/offline. location = /healthz { access_log off; return 200 'ok\n'; @@ -234,10 +237,10 @@ spec: ports: - containerPort: 11434 # Readiness probe DOES check upstream so K8s only routes traffic - # when at least one Ollama backend is reachable. timeoutSeconds=5 - # allows nginx to fail over from BLUEJAY-WS primary to edge1 - # backup before the probe fails (was timeoutSeconds=1 default → - # 172 historic restarts when workstation Ollama was down). + # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's + # slower TCP handshake under load (was timeoutSeconds=1 default → + # 172 historic restarts when the workstation primary path went down, + # before the cluster was repointed to edge1-only on 2026-04-27). readinessProbe: httpGet: path: /api/tags @@ -277,8 +280,9 @@ spec: # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat, # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet). # api_key comes from A0_SET_chat_model_api_key env var (overrides - # config.json). util + embedding stay on local 127.0.0.1 Ollama - # proxy (workstation primary, edge1 fallback). + # config.json). util + embedding go to local 127.0.0.1 nginx + # proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation + # is private dev hardware, intentionally not in the cluster path). mkdir -p /a0/usr/plugins/_model_config cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}} @@ -554,13 +558,8 @@ spec: protocol: UDP - port: 53 protocol: TCP - # Ollama on BLUEJAY-WS - - to: - - ipBlock: - cidr: 10.0.56.20/32 - ports: - - port: 11434 - # Ollama on edge1 fallback + # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation + # is private dev hardware and intentionally not allowlisted) - to: - ipBlock: cidr: 10.0.57.17/32