agent-zero: drop BLUEJAY-WS upstream, edge1 Pi is sole Ollama backend

Workstation (BLUEJAY-WS) is private dev hardware and should not be in the
cluster path. Repointing the nginx ollama-proxy sidecar so cluster Agent Zero
talks ONLY to edge1 Pi 5 + AI HAT+ (10.0.57.17:11434):

- nginx upstream: edge1 sole server, no workstation entry
- wait-for-ollama init container: only checks edge1
- NetworkPolicy egress: drop 10.0.56.20/32, keep 10.0.57.17/32
- Comments updated throughout to flag workstation as off-limits to cluster
- Annotation rewritten to document the architectural intent

Pulled qwen2.5:1.5b on edge1 first so Agent Zero's utility_model survives
the cutover (existing models on edge1: qwen3:4b, gemma3:4b, qwen2.5-coder:7b,
nomic-embed-text). Model count on edge1: 4 → 5.

Lets BLUEJAY-WS lock down its Ollama port to localhost without breaking
the cluster Agent Zero.
This commit is contained in:
Andrew Stoltz
2026-04-27 16:30:44 -05:00
parent 3bf6511d5d
commit 0f9d56ee16

View File

@@ -2,14 +2,15 @@
# Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal) # Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
# ============================================================================= # =============================================================================
# Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile # Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
# Ollama: workstation-first via BLUEJAY-WS (10.0.56.20:11434) with edge1 Pi 5 # Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434).
# fallback (10.0.57.17:11434) # Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream —
# the workstation is private dev hardware, not a cluster dependency.
# Target: RKE2 bare-metal cluster, namespace: agent-zero # Target: RKE2 bare-metal cluster, namespace: agent-zero
# Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme) # Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
# #
# Differences from LOCAL (WSL K3s): # Differences from LOCAL (WSL K3s):
# - Uses Longhorn StorageClass (not local-path) # - Uses Longhorn StorageClass (not local-path)
# - Prefers workstation Ollama on the R9700, falls back to edge1 Pi 5 # - Cluster-only Ollama path (edge1) — keeps workstation private
# - NO Anthropic API key (free/local models only) # - NO Anthropic API key (free/local models only)
# - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed) # - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
# - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps # - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
@@ -91,7 +92,7 @@ subjects:
# ============================================================================= # =============================================================================
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile) # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
# ============================================================================= # =============================================================================
# Connects to a local proxy that routes to workstation Ollama first and edge1 second # Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions # Blue Jay profile with 21 tools, 3 prompts, 4 extensions
--- ---
@@ -118,7 +119,7 @@ metadata:
annotations: annotations:
agent-zero/deployment: "nuc" agent-zero/deployment: "nuc"
agent-zero/profile: "bluejay" agent-zero/profile: "bluejay"
agent-zero/ollama: "BLUEJAY-WS primary (10.0.56.20:11434), edge1 fallback (10.0.57.17:11434)" agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
spec: spec:
replicas: 1 replicas: 1
selector: selector:
@@ -133,19 +134,19 @@ spec:
spec: spec:
serviceAccountName: agent-zero serviceAccountName: agent-zero
initContainers: initContainers:
# Wait for either workstation or edge1 Ollama to be reachable before starting Agent Zero. # Wait for edge1 Ollama to be reachable before starting Agent Zero.
# (Workstation Ollama is intentionally NOT in the cluster path.)
- name: wait-for-ollama - name: wait-for-ollama
image: busybox:1.37 image: busybox:1.37
command: ["sh", "-c"] command: ["sh", "-c"]
args: args:
- | - |
echo "Waiting for Ollama at BLUEJAY-WS or edge1..." echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
until wget -qO- --timeout=2 http://10.0.56.20:11434/api/tags >/dev/null 2>&1 || \ until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do echo "edge1 Ollama not ready yet, retrying in 5s..."
echo "No Ollama endpoint ready yet, retrying in 5s..."
sleep 5 sleep 5
done done
echo "At least one Ollama endpoint is reachable." echo "edge1 Ollama is reachable."
# Assemble the Blue Jay profile directory structure from ConfigMaps. # Assemble the Blue Jay profile directory structure from ConfigMaps.
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC. # ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
- name: setup-bluejay - name: setup-bluejay
@@ -202,16 +203,18 @@ spec:
events { worker_connections 1024; } events { worker_connections 1024; }
http { http {
upstream ollama_upstream { upstream ollama_upstream {
server 10.0.56.20:11434 max_fails=2 fail_timeout=10s; # edge1 Pi 5 + AI HAT+ is the SOLE upstream.
server 10.0.57.17:11434 backup; # Workstation Ollama (BLUEJAY-WS) is private dev hardware and
# MUST NOT be added back here without explicit operator decision —
# adding it would expose the workstation to cluster traffic.
server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
keepalive 16; keepalive 16;
} }
server { server {
listen 11434; listen 11434;
# Local healthcheck — proves nginx itself is alive. # Local healthcheck — proves nginx itself is alive.
# Must NOT depend on upstream so liveness doesn't restart # Must NOT depend on upstream so liveness doesn't restart
# the container when BLUEJAY-WS Ollama is slow/offline # the container when edge1 is slow/offline.
# and nginx is mid-failover to the edge1 backup.
location = /healthz { location = /healthz {
access_log off; access_log off;
return 200 'ok\n'; return 200 'ok\n';
@@ -234,10 +237,10 @@ spec:
ports: ports:
- containerPort: 11434 - containerPort: 11434
# Readiness probe DOES check upstream so K8s only routes traffic # Readiness probe DOES check upstream so K8s only routes traffic
# when at least one Ollama backend is reachable. timeoutSeconds=5 # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
# allows nginx to fail over from BLUEJAY-WS primary to edge1 # slower TCP handshake under load (was timeoutSeconds=1 default →
# backup before the probe fails (was timeoutSeconds=1 default → # 172 historic restarts when the workstation primary path went down,
# 172 historic restarts when workstation Ollama was down). # before the cluster was repointed to edge1-only on 2026-04-27).
readinessProbe: readinessProbe:
httpGet: httpGet:
path: /api/tags path: /api/tags
@@ -277,8 +280,9 @@ spec:
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat, # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet). # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
# api_key comes from A0_SET_chat_model_api_key env var (overrides # api_key comes from A0_SET_chat_model_api_key env var (overrides
# config.json). util + embedding stay on local 127.0.0.1 Ollama # config.json). util + embedding go to local 127.0.0.1 nginx
# proxy (workstation primary, edge1 fallback). # proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
# is private dev hardware, intentionally not in the cluster path).
mkdir -p /a0/usr/plugins/_model_config mkdir -p /a0/usr/plugins/_model_config
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}} {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
@@ -554,13 +558,8 @@ spec:
protocol: UDP protocol: UDP
- port: 53 - port: 53
protocol: TCP protocol: TCP
# Ollama on BLUEJAY-WS # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
- to: # is private dev hardware and intentionally not allowlisted)
- ipBlock:
cidr: 10.0.56.20/32
ports:
- port: 11434
# Ollama on edge1 fallback
- to: - to:
- ipBlock: - ipBlock:
cidr: 10.0.57.17/32 cidr: 10.0.57.17/32