Files
bluejay-infra/apps/agent-zero/agent-zero.yaml
Andrew Stoltz 62db15c69c feat(agent-zero): route chat_model through fc-llm-bridge (ADR-088)
Flips Agent Zero's chat_model from direct local Ollama (gemma3:12b via
the 127.0.0.1:11434 sidecar proxy) to the FlowerCore LLM Bridge
(fc:balanced tier, OpenAI-compatible, Anthropic Claude Sonnet under the
hood) so chat turns are spend-tracked and can dispatch to any provider
via a single tier alias.

Scope is intentionally minimal and reversible:
  - chat_model: ollama/gemma3:12b/127.0.0.1:11434
              → openai/fc:balanced/fc-llm-bridge internal service URL
  - utility_model, embedding_model, browser_model: UNCHANGED
    (stay on local 127.0.0.1 Ollama sidecar — no spend, low latency,
    not worth routing through the bridge for small-model traffic).

Auth: new A0_SET_chat_model_api_key env var wired to the
fc-llm-bridge-api-keys Secret (field: agent-zero-k8s). The Secret is
synced by a new OnePasswordItem pointing at "FC LLM Bridge API Keys"
in the IAmWorkin vault. Bearer-token auth is now accepted by the
bridge (FlowerCore.LlmBridge@3225f1f).

Rollback: revert this commit; old image v202604231449 is still present
on all RKE2 nodes, and Agent Zero's strategy: Recreate makes the flip
atomic.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 09:54:27 -05:00

542 lines
19 KiB
YAML

# =============================================================================
# Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
# =============================================================================
# Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
# Ollama: workstation-first via BLUEJAY-WS (10.0.56.20:11434) with edge1 Pi 5
# fallback (10.0.57.17:11434)
# Target: RKE2 bare-metal cluster, namespace: agent-zero
# Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
#
# Differences from LOCAL (WSL K3s):
# - Uses Longhorn StorageClass (not local-path)
# - Prefers workstation Ollama on the R9700, falls back to edge1 Pi 5
# - NO Anthropic API key (free/local models only)
# - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
# - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
# - Traefik IngressRoute for LAN access at agent-zero.iamworkin.lan
#
# ConfigMaps (defined in configmaps-bluejay.yaml):
# bluejay-tools 21 Python tool modules (~520K)
# bluejay-profile agent.json, agent.yaml, system_prompt.md (~20K)
# bluejay-prompts 3 prompt templates (~11K)
# flowercore-extensions 5 Python extension modules (~76K)
# bluejay-theme CSS theme (~7K)
#
# Apply: KUBECONFIG=~/.kube/rke2.yaml kubectl apply -f agent-zero-nuc.yaml
# =============================================================================
---
apiVersion: v1
kind: Namespace
metadata:
name: agent-zero
labels:
app.kubernetes.io/part-of: agent-zero-stack
# =============================================================================
# Persistent Volume Claims (Longhorn)
# =============================================================================
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: agent-zero-data
namespace: agent-zero
spec:
accessModes: [ReadWriteOnce]
storageClassName: longhorn
resources:
requests:
storage: 5Gi
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: agent-zero-knowledge
namespace: agent-zero
spec:
accessModes: [ReadWriteOnce]
storageClassName: longhorn
resources:
requests:
storage: 1Gi
# =============================================================================
# RBAC — Give Agent Zero kubectl access to the cluster
# =============================================================================
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: agent-zero
namespace: agent-zero
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: agent-zero-cluster-admin
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-admin
subjects:
- kind: ServiceAccount
name: agent-zero
namespace: agent-zero
# =============================================================================
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
# =============================================================================
# Connects to a local proxy that routes to workstation Ollama first and edge1 second
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions
---
# FC LLM Bridge API key for Agent Zero (ADR-088 chat_model routing).
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
# Consumed by the chat_model only; util / embedding / browser stay on local
# Ollama via the 127.0.0.1 sidecar proxy.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: fc-llm-bridge-api-keys
namespace: agent-zero
spec:
itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: agent-zero
namespace: agent-zero
labels:
app: agent-zero
annotations:
agent-zero/deployment: "nuc"
agent-zero/profile: "bluejay"
agent-zero/ollama: "BLUEJAY-WS primary (10.0.56.20:11434), edge1 fallback (10.0.57.17:11434)"
spec:
replicas: 1
selector:
matchLabels:
app: agent-zero
strategy:
type: Recreate
template:
metadata:
labels:
app: agent-zero
spec:
serviceAccountName: agent-zero
initContainers:
# Wait for either workstation or edge1 Ollama to be reachable before starting Agent Zero.
- name: wait-for-ollama
image: busybox:1.37
command: ["sh", "-c"]
args:
- |
echo "Waiting for Ollama at BLUEJAY-WS or edge1..."
until wget -qO- --timeout=2 http://10.0.56.20:11434/api/tags >/dev/null 2>&1 || \
wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
echo "No Ollama endpoint ready yet, retrying in 5s..."
sleep 5
done
echo "At least one Ollama endpoint is reachable."
# Assemble the Blue Jay profile directory structure from ConfigMaps.
# ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
- name: setup-bluejay
image: busybox:1.37
command: ["sh", "-c"]
args:
- |
echo "Setting up Blue Jay profile..."
# Profile root files
mkdir -p /a0/work/.bluejay/agents/bluejay/tools
mkdir -p /a0/work/.bluejay/agents/bluejay/prompts
cp /tmp/bluejay-profile/* /a0/work/.bluejay/agents/bluejay/
# Tools (split across 3 ConfigMaps to stay under K8s 262K annotation limit)
cp /tmp/bluejay-tools-a/* /a0/work/.bluejay/agents/bluejay/tools/
cp /tmp/bluejay-tools-b/* /a0/work/.bluejay/agents/bluejay/tools/
cp /tmp/bluejay-tools-c/* /a0/work/.bluejay/agents/bluejay/tools/
# Prompts
cp /tmp/bluejay-prompts/* /a0/work/.bluejay/agents/bluejay/prompts/
# Extensions
mkdir -p /a0/work/.bluejay/extensions/flowercore
cp /tmp/flowercore-extensions/* /a0/work/.bluejay/extensions/flowercore/
# Theme
mkdir -p /a0/work/.bluejay/theme
cp /tmp/bluejay-theme/* /a0/work/.bluejay/theme/
echo "Blue Jay profile ready:"
echo " Tools: $(ls /a0/work/.bluejay/agents/bluejay/tools/*.py | wc -l)"
echo " Prompts: $(ls /a0/work/.bluejay/agents/bluejay/prompts/*.md | wc -l)"
echo " Extensions: $(ls /a0/work/.bluejay/extensions/flowercore/*.py | wc -l)"
volumeMounts:
- name: workspace
mountPath: /a0/work
- name: bluejay-tools-a
mountPath: /tmp/bluejay-tools-a
- name: bluejay-tools-b
mountPath: /tmp/bluejay-tools-b
- name: bluejay-tools-c
mountPath: /tmp/bluejay-tools-c
- name: bluejay-profile
mountPath: /tmp/bluejay-profile
- name: bluejay-prompts
mountPath: /tmp/bluejay-prompts
- name: flowercore-extensions
mountPath: /tmp/flowercore-extensions
- name: bluejay-theme
mountPath: /tmp/bluejay-theme
containers:
- name: ollama-proxy
image: nginx:1.27-alpine
command: ["/bin/sh", "-c"]
args:
- |
cat > /etc/nginx/nginx.conf <<'NGINX'
worker_processes 1;
events { worker_connections 1024; }
http {
upstream ollama_upstream {
server 10.0.56.20:11434 max_fails=2 fail_timeout=10s;
server 10.0.57.17:11434 backup;
keepalive 16;
}
server {
listen 11434;
location / {
proxy_http_version 1.1;
proxy_set_header Connection "";
proxy_set_header Host $host;
proxy_connect_timeout 5s;
proxy_read_timeout 600s;
proxy_send_timeout 600s;
proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
proxy_pass http://ollama_upstream;
}
}
}
NGINX
exec nginx -g 'daemon off;'
ports:
- containerPort: 11434
readinessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 5
periodSeconds: 15
livenessProbe:
httpGet:
path: /api/tags
port: 11434
initialDelaySeconds: 10
periodSeconds: 30
- name: agent-zero
image: agent0ai/agent-zero:latest
command: ["/bin/bash", "-c"]
args:
- |
# Install kubectl if not cached
if [ -f /a0/work/kubectl ]; then
cp /a0/work/kubectl /usr/local/bin/kubectl
else
curl -sLO "https://dl.k8s.io/release/v1.32.0/bin/linux/amd64/kubectl" && \
chmod +x kubectl && mv kubectl /usr/local/bin/kubectl && \
cp /usr/local/bin/kubectl /a0/work/kubectl
fi
# Link Blue Jay profile from workspace into Agent Zero's expected path
ln -sfn /a0/work/.bluejay/agents/bluejay /a0/agents/bluejay
# Write model config BEFORE initialize.sh loads it
# The _model_config plugin reads config.json (NOT config.yaml).
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
# api_key comes from A0_SET_chat_model_api_key env var (overrides
# config.json). util + embedding stay on local 127.0.0.1 Ollama
# proxy (workstation primary, edge1 fallback).
mkdir -p /a0/usr/plugins/_model_config
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc.cluster.local:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://127.0.0.1:11434","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://127.0.0.1:11434","kwargs":{}}}
MODELCFG
# Strip heredoc indentation
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
# Run the original entrypoint
exec /exe/initialize.sh $BRANCH
ports:
- containerPort: 80
env:
# Agent identity
- name: AGENT_NAME
value: "Blue Jay (NUC)"
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
# Util / embedding / browser stay on local Ollama via 127.0.0.1 proxy
# for zero-latency, zero-cost small-model traffic.
- name: A0_SET_chat_model_provider
value: "openai"
- name: A0_SET_chat_model_name
value: "fc:balanced"
- name: A0_SET_chat_model_api_base
value: "http://fc-llm-bridge.fc-llm-bridge.svc.cluster.local:8080/v1"
- name: A0_SET_chat_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
- name: A0_SET_chat_model_ctx_length
value: "8192"
- name: A0_SET_chat_model_kwargs
value: '{"temperature": 0, "num_ctx": 8192}'
# Utility model — fast small helper tier through the same proxy
- name: A0_SET_util_model_provider
value: "ollama"
- name: A0_SET_util_model_name
value: "qwen2.5:1.5b"
- name: A0_SET_util_model_api_base
value: "http://127.0.0.1:11434"
- name: A0_SET_util_model_kwargs
value: '{"num_ctx": 2048}'
# Embedding model — nomic through the same proxy
- name: A0_SET_embed_model_provider
value: "ollama"
- name: A0_SET_embed_model_name
value: "nomic-embed-text"
- name: A0_SET_embed_model_api_base
value: "http://127.0.0.1:11434"
# Browser model — small Gemma candidate through the same proxy
- name: A0_SET_browser_model_provider
value: "ollama"
- name: A0_SET_browser_model_name
value: "gemma3:4b"
- name: A0_SET_browser_model_api_base
value: "http://127.0.0.1:11434"
- name: A0_SET_browser_model_vision
value: "true"
# Agent profile — Blue Jay personality, tools, and system prompt
- name: A0_SET_agent_profile
value: "bluejay"
# Memory settings
- name: A0_SET_memory_memorize_enabled
value: "true"
- name: A0_SET_memory_memorize_consolidation
value: "true"
- name: A0_SET_memory_memorize_replace_threshold
value: "0.85"
- name: A0_SET_memory_recall_enabled
value: "true"
# Speech-to-text disabled (no GPU for Whisper)
- name: A0_SET_stt_model_size
value: "tiny"
# Print.Web — Thermal printer service on edge2
- name: PRINT_WEB_URL
value: "http://10.0.57.16:5200"
# Kubernetes
- name: KUBERNETES_SERVICE_HOST
value: "kubernetes.default.svc"
- name: KUBERNETES_SERVICE_PORT
value: "443"
volumeMounts:
- name: workspace
mountPath: /a0/work
- name: knowledge
mountPath: /a0/knowledge/custom/main
- name: flowercore-extensions
mountPath: /a0/extensions/flowercore
readOnly: true
- name: bluejay-theme
mountPath: /a0/webui/static/css/custom
readOnly: true
startupProbe:
httpGet:
path: /
port: 80
initialDelaySeconds: 15
periodSeconds: 10
failureThreshold: 18
livenessProbe:
httpGet:
path: /
port: 80
periodSeconds: 30
failureThreshold: 3
readinessProbe:
exec:
command:
- /bin/bash
- -c
- "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
periodSeconds: 30
failureThreshold: 2
resources:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "3Gi"
cpu: "2000m"
volumes:
- name: workspace
persistentVolumeClaim:
claimName: agent-zero-data
- name: knowledge
persistentVolumeClaim:
claimName: agent-zero-knowledge
- name: bluejay-tools-a
configMap:
name: bluejay-tools-a
- name: bluejay-tools-b
configMap:
name: bluejay-tools-b
- name: bluejay-tools-c
configMap:
name: bluejay-tools-c
- name: bluejay-profile
configMap:
name: bluejay-profile
- name: bluejay-prompts
configMap:
name: bluejay-prompts
- name: flowercore-extensions
configMap:
name: flowercore-extensions
- name: bluejay-theme
configMap:
name: bluejay-theme
---
apiVersion: v1
kind: Service
metadata:
name: agent-zero
namespace: agent-zero
spec:
type: ClusterIP
selector:
app: agent-zero
ports:
- port: 80
targetPort: 80
# =============================================================================
# Traefik IngressRoute — LAN access at agent-zero.iamworkin.lan
# =============================================================================
---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
name: agent-zero
namespace: agent-zero
spec:
entryPoints:
- websecure
routes:
- match: Host(`agent-zero.iamworkin.lan`)
kind: Rule
services:
- name: agent-zero
port: 80
tls:
secretName: agent-zero-tls
# =============================================================================
# TLS Certificate via cert-manager (step-ca ACME)
# =============================================================================
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: agent-zero-tls
namespace: agent-zero
spec:
secretName: agent-zero-tls
issuerRef:
name: step-ca-acme
kind: ClusterIssuer
dnsNames:
- agent-zero.iamworkin.lan
duration: 720h
renewBefore: 240h
# =============================================================================
# NetworkPolicy — Restrict traffic
# =============================================================================
---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
name: agent-zero-netpol
namespace: agent-zero
spec:
podSelector:
matchLabels:
app: agent-zero
policyTypes:
- Ingress
- Egress
ingress:
# Allow from Traefik
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: traefik-system
ports:
- port: 80
# Allow from monitoring (blackbox probe)
- from:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: monitoring
ports:
- port: 80
egress:
# DNS
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: kube-system
ports:
- port: 53
protocol: UDP
- port: 53
protocol: TCP
# Ollama on BLUEJAY-WS
- to:
- ipBlock:
cidr: 10.0.56.20/32
ports:
- port: 11434
# Ollama on edge1 fallback
- to:
- ipBlock:
cidr: 10.0.57.17/32
ports:
- port: 11434
# Print.Web on edge2
- to:
- ipBlock:
cidr: 10.0.57.16/32
ports:
- port: 5200
# K8s API
- to:
- ipBlock:
cidr: 10.0.56.11/32
ports:
- port: 6443
# Allow internet (for kubectl image pull, etc)
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16