agent-zero: drop BLUEJAY-WS upstream, edge1 Pi is sole Ollama backend
Workstation (BLUEJAY-WS) is private dev hardware and should not be in the cluster path. Repointing the nginx ollama-proxy sidecar so cluster Agent Zero talks ONLY to edge1 Pi 5 + AI HAT+ (10.0.57.17:11434): - nginx upstream: edge1 sole server, no workstation entry - wait-for-ollama init container: only checks edge1 - NetworkPolicy egress: drop 10.0.56.20/32, keep 10.0.57.17/32 - Comments updated throughout to flag workstation as off-limits to cluster - Annotation rewritten to document the architectural intent Pulled qwen2.5:1.5b on edge1 first so Agent Zero's utility_model survives the cutover (existing models on edge1: qwen3:4b, gemma3:4b, qwen2.5-coder:7b, nomic-embed-text). Model count on edge1: 4 → 5. Lets BLUEJAY-WS lock down its Ollama port to localhost without breaking the cluster Agent Zero.
This commit is contained in:
@@ -2,14 +2,15 @@
|
|||||||
# Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
|
# Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
|
# Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
|
||||||
# Ollama: workstation-first via BLUEJAY-WS (10.0.56.20:11434) with edge1 Pi 5
|
# Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434).
|
||||||
# fallback (10.0.57.17:11434)
|
# Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream —
|
||||||
|
# the workstation is private dev hardware, not a cluster dependency.
|
||||||
# Target: RKE2 bare-metal cluster, namespace: agent-zero
|
# Target: RKE2 bare-metal cluster, namespace: agent-zero
|
||||||
# Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
|
# Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
|
||||||
#
|
#
|
||||||
# Differences from LOCAL (WSL K3s):
|
# Differences from LOCAL (WSL K3s):
|
||||||
# - Uses Longhorn StorageClass (not local-path)
|
# - Uses Longhorn StorageClass (not local-path)
|
||||||
# - Prefers workstation Ollama on the R9700, falls back to edge1 Pi 5
|
# - Cluster-only Ollama path (edge1) — keeps workstation private
|
||||||
# - NO Anthropic API key (free/local models only)
|
# - NO Anthropic API key (free/local models only)
|
||||||
# - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
|
# - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
|
||||||
# - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
|
# - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
|
||||||
@@ -91,7 +92,7 @@ subjects:
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Connects to a local proxy that routes to workstation Ollama first and edge1 second
|
# Connects to a local nginx proxy that routes to edge1 Pi 5 + AI HAT+ Ollama only
|
||||||
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -118,7 +119,7 @@ metadata:
|
|||||||
annotations:
|
annotations:
|
||||||
agent-zero/deployment: "nuc"
|
agent-zero/deployment: "nuc"
|
||||||
agent-zero/profile: "bluejay"
|
agent-zero/profile: "bluejay"
|
||||||
agent-zero/ollama: "BLUEJAY-WS primary (10.0.56.20:11434), edge1 fallback (10.0.57.17:11434)"
|
agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
|
||||||
spec:
|
spec:
|
||||||
replicas: 1
|
replicas: 1
|
||||||
selector:
|
selector:
|
||||||
@@ -133,19 +134,19 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
serviceAccountName: agent-zero
|
serviceAccountName: agent-zero
|
||||||
initContainers:
|
initContainers:
|
||||||
# Wait for either workstation or edge1 Ollama to be reachable before starting Agent Zero.
|
# Wait for edge1 Ollama to be reachable before starting Agent Zero.
|
||||||
|
# (Workstation Ollama is intentionally NOT in the cluster path.)
|
||||||
- name: wait-for-ollama
|
- name: wait-for-ollama
|
||||||
image: busybox:1.37
|
image: busybox:1.37
|
||||||
command: ["sh", "-c"]
|
command: ["sh", "-c"]
|
||||||
args:
|
args:
|
||||||
- |
|
- |
|
||||||
echo "Waiting for Ollama at BLUEJAY-WS or edge1..."
|
echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
|
||||||
until wget -qO- --timeout=2 http://10.0.56.20:11434/api/tags >/dev/null 2>&1 || \
|
until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
|
||||||
wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
|
echo "edge1 Ollama not ready yet, retrying in 5s..."
|
||||||
echo "No Ollama endpoint ready yet, retrying in 5s..."
|
|
||||||
sleep 5
|
sleep 5
|
||||||
done
|
done
|
||||||
echo "At least one Ollama endpoint is reachable."
|
echo "edge1 Ollama is reachable."
|
||||||
# Assemble the Blue Jay profile directory structure from ConfigMaps.
|
# Assemble the Blue Jay profile directory structure from ConfigMaps.
|
||||||
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
|
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
|
||||||
- name: setup-bluejay
|
- name: setup-bluejay
|
||||||
@@ -202,16 +203,18 @@ spec:
|
|||||||
events { worker_connections 1024; }
|
events { worker_connections 1024; }
|
||||||
http {
|
http {
|
||||||
upstream ollama_upstream {
|
upstream ollama_upstream {
|
||||||
server 10.0.56.20:11434 max_fails=2 fail_timeout=10s;
|
# edge1 Pi 5 + AI HAT+ is the SOLE upstream.
|
||||||
server 10.0.57.17:11434 backup;
|
# Workstation Ollama (BLUEJAY-WS) is private dev hardware and
|
||||||
|
# MUST NOT be added back here without explicit operator decision —
|
||||||
|
# adding it would expose the workstation to cluster traffic.
|
||||||
|
server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
|
||||||
keepalive 16;
|
keepalive 16;
|
||||||
}
|
}
|
||||||
server {
|
server {
|
||||||
listen 11434;
|
listen 11434;
|
||||||
# Local healthcheck — proves nginx itself is alive.
|
# Local healthcheck — proves nginx itself is alive.
|
||||||
# Must NOT depend on upstream so liveness doesn't restart
|
# Must NOT depend on upstream so liveness doesn't restart
|
||||||
# the container when BLUEJAY-WS Ollama is slow/offline
|
# the container when edge1 is slow/offline.
|
||||||
# and nginx is mid-failover to the edge1 backup.
|
|
||||||
location = /healthz {
|
location = /healthz {
|
||||||
access_log off;
|
access_log off;
|
||||||
return 200 'ok\n';
|
return 200 'ok\n';
|
||||||
@@ -234,10 +237,10 @@ spec:
|
|||||||
ports:
|
ports:
|
||||||
- containerPort: 11434
|
- containerPort: 11434
|
||||||
# Readiness probe DOES check upstream so K8s only routes traffic
|
# Readiness probe DOES check upstream so K8s only routes traffic
|
||||||
# when at least one Ollama backend is reachable. timeoutSeconds=5
|
# when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
|
||||||
# allows nginx to fail over from BLUEJAY-WS primary to edge1
|
# slower TCP handshake under load (was timeoutSeconds=1 default →
|
||||||
# backup before the probe fails (was timeoutSeconds=1 default →
|
# 172 historic restarts when the workstation primary path went down,
|
||||||
# 172 historic restarts when workstation Ollama was down).
|
# before the cluster was repointed to edge1-only on 2026-04-27).
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /api/tags
|
path: /api/tags
|
||||||
@@ -277,8 +280,9 @@ spec:
|
|||||||
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
|
||||||
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
|
||||||
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
# api_key comes from A0_SET_chat_model_api_key env var (overrides
|
||||||
# config.json). util + embedding stay on local 127.0.0.1 Ollama
|
# config.json). util + embedding go to local 127.0.0.1 nginx
|
||||||
# proxy (workstation primary, edge1 fallback).
|
# proxy which routes to edge1 Pi 5 + AI HAT+ ONLY (workstation
|
||||||
|
# is private dev hardware, intentionally not in the cluster path).
|
||||||
mkdir -p /a0/usr/plugins/_model_config
|
mkdir -p /a0/usr/plugins/_model_config
|
||||||
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
|
||||||
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
|
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
|
||||||
@@ -554,13 +558,8 @@ spec:
|
|||||||
protocol: UDP
|
protocol: UDP
|
||||||
- port: 53
|
- port: 53
|
||||||
protocol: TCP
|
protocol: TCP
|
||||||
# Ollama on BLUEJAY-WS
|
# Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
|
||||||
- to:
|
# is private dev hardware and intentionally not allowlisted)
|
||||||
- ipBlock:
|
|
||||||
cidr: 10.0.56.20/32
|
|
||||||
ports:
|
|
||||||
- port: 11434
|
|
||||||
# Ollama on edge1 fallback
|
|
||||||
- to:
|
- to:
|
||||||
- ipBlock:
|
- ipBlock:
|
||||||
cidr: 10.0.57.17/32
|
cidr: 10.0.57.17/32
|
||||||
|
|||||||
Reference in New Issue
Block a user