# ============================================================================= # Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal) # ============================================================================= # Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile # Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434). # Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream — # the workstation is private dev hardware, not a cluster dependency. # Target: RKE2 bare-metal cluster, namespace: agent-zero # Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme) # # Differences from LOCAL (WSL K3s): # - Uses Longhorn StorageClass (not local-path) # - Cluster-only Ollama path (edge1) — keeps workstation private # - NO Anthropic API key (free/local models only) # - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed) # - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps # - Traefik IngressRoute for LAN access at agent-zero.iamworkin.lan # # ConfigMaps (defined in configmaps-bluejay.yaml): # bluejay-tools 21 Python tool modules (~520K) # bluejay-profile agent.json, agent.yaml, system_prompt.md (~20K) # bluejay-prompts 3 prompt templates (~11K) # flowercore-extensions 5 Python extension modules (~76K) # bluejay-theme CSS theme (~7K) # # Apply: KUBECONFIG=~/.kube/rke2.yaml kubectl apply -f agent-zero-nuc.yaml # ============================================================================= --- apiVersion: v1 kind: Namespace metadata: name: agent-zero labels: app.kubernetes.io/part-of: agent-zero-stack # ============================================================================= # Persistent Volume Claims (Longhorn) # ============================================================================= --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: agent-zero-data namespace: agent-zero spec: accessModes: [ReadWriteOnce] storageClassName: longhorn resources: requests: storage: 5Gi --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: agent-zero-knowledge namespace: agent-zero spec: accessModes: [ReadWriteOnce] storageClassName: longhorn resources: requests: storage: 1Gi # ============================================================================= # RBAC — Give Agent Zero kubectl access to the cluster # ============================================================================= --- apiVersion: v1 kind: ServiceAccount metadata: name: agent-zero namespace: agent-zero --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: agent-zero-cluster-admin roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cluster-admin subjects: - kind: ServiceAccount name: agent-zero namespace: agent-zero # ============================================================================= # Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile) # ============================================================================= # Chat / utility / embedding lanes route through fc-llm-bridge. Browser keeps # a local nginx proxy to edge1 Pi 5 + AI HAT+ until the bridge grows a live # Vision route and the in-pod tools stop calling Ollama directly. # Blue Jay profile with 21 tools, 3 prompts, 4 extensions --- # FC LLM Bridge API key for Agent Zero (ADR-088 chat / util / embed routing). # Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s). # Consumed by the OpenAI-compatible chat / util / embedding lanes. Browser # stays on the local Ollama sidecar until fc:vision is configured on the bridge. apiVersion: onepassword.com/v1 kind: OnePasswordItem metadata: name: fc-llm-bridge-api-keys namespace: agent-zero spec: itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys" --- # Print.Web API key for Agent Zero's print_web.py Python tool. # Syncs from 1Password item "Print.Web API Keys" (password field = API key). # The print_web.py tool reads PRINT_WEB_API_KEY env var for all HTTP requests # to the thermal print service (GET /api/mcp/tools, POST /api/print/*, etc.). # Note: Print.Web uses the legacy REST MCP shape (/api/mcp/tools/*), not the # streamable-http MCP protocol. The print_web Python tool bridges this gap # and is already present in bluejay-tools ConfigMaps. apiVersion: onepassword.com/v1 kind: OnePasswordItem metadata: name: print-web-api-keys namespace: agent-zero spec: itemPath: "vaults/IAmWorkin/items/Print.Web API Keys" --- apiVersion: apps/v1 kind: Deployment metadata: name: agent-zero namespace: agent-zero labels: app: agent-zero annotations: agent-zero/deployment: "nuc" agent-zero/profile: "bluejay" agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency" spec: replicas: 1 selector: matchLabels: app: agent-zero strategy: type: Recreate template: metadata: labels: app: agent-zero spec: serviceAccountName: agent-zero initContainers: # Wait for edge1 Ollama to be reachable before starting Agent Zero. # (Workstation Ollama is intentionally NOT in the cluster path.) - name: wait-for-ollama image: busybox:1.37 command: ["sh", "-c"] args: - | echo "Waiting for edge1 Ollama (10.0.57.17:11434)..." until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do echo "edge1 Ollama not ready yet, retrying in 5s..." sleep 5 done echo "edge1 Ollama is reachable." # Assemble the Blue Jay profile directory structure from ConfigMaps. # ConfigMaps can't create nested dirs, so we copy into the workspace PVC. - name: setup-bluejay image: busybox:1.37 command: ["sh", "-c"] args: - | echo "Setting up Blue Jay profile..." # Profile root files mkdir -p /a0/work/.bluejay/agents/bluejay/tools mkdir -p /a0/work/.bluejay/agents/bluejay/prompts cp /tmp/bluejay-profile/* /a0/work/.bluejay/agents/bluejay/ # Tools (split across 3 ConfigMaps to stay under K8s 262K annotation limit) cp /tmp/bluejay-tools-a/* /a0/work/.bluejay/agents/bluejay/tools/ cp /tmp/bluejay-tools-b/* /a0/work/.bluejay/agents/bluejay/tools/ cp /tmp/bluejay-tools-c/* /a0/work/.bluejay/agents/bluejay/tools/ # Prompts cp /tmp/bluejay-prompts/* /a0/work/.bluejay/agents/bluejay/prompts/ # Extensions mkdir -p /a0/work/.bluejay/extensions/flowercore cp /tmp/flowercore-extensions/* /a0/work/.bluejay/extensions/flowercore/ # Theme mkdir -p /a0/work/.bluejay/theme cp /tmp/bluejay-theme/* /a0/work/.bluejay/theme/ echo "Blue Jay profile ready:" echo " Tools: $(ls /a0/work/.bluejay/agents/bluejay/tools/*.py | wc -l)" echo " Prompts: $(ls /a0/work/.bluejay/agents/bluejay/prompts/*.md | wc -l)" echo " Extensions: $(ls /a0/work/.bluejay/extensions/flowercore/*.py | wc -l)" volumeMounts: - name: workspace mountPath: /a0/work - name: bluejay-tools-a mountPath: /tmp/bluejay-tools-a - name: bluejay-tools-b mountPath: /tmp/bluejay-tools-b - name: bluejay-tools-c mountPath: /tmp/bluejay-tools-c - name: bluejay-profile mountPath: /tmp/bluejay-profile - name: bluejay-prompts mountPath: /tmp/bluejay-prompts - name: flowercore-extensions mountPath: /tmp/flowercore-extensions - name: bluejay-theme mountPath: /tmp/bluejay-theme containers: - name: ollama-proxy image: nginx:1.27-alpine command: ["/bin/sh", "-c"] args: - | cat > /etc/nginx/nginx.conf <<'NGINX' worker_processes 1; events { worker_connections 1024; } http { upstream ollama_upstream { # edge1 Pi 5 + AI HAT+ is the SOLE upstream. # Workstation Ollama (BLUEJAY-WS) is private dev hardware and # MUST NOT be added back here without explicit operator decision — # adding it would expose the workstation to cluster traffic. server 10.0.57.17:11434 max_fails=2 fail_timeout=10s; keepalive 16; } server { listen 11434; # Local healthcheck — proves nginx itself is alive. # Must NOT depend on upstream so liveness doesn't restart # the container when edge1 is slow/offline. location = /healthz { access_log off; return 200 'ok\n'; default_type text/plain; } location / { proxy_http_version 1.1; proxy_set_header Connection ""; proxy_set_header Host $host; proxy_connect_timeout 5s; proxy_read_timeout 600s; proxy_send_timeout 600s; proxy_next_upstream error timeout invalid_header http_502 http_503 http_504; proxy_pass http://ollama_upstream; } } } NGINX exec nginx -g 'daemon off;' ports: - containerPort: 11434 # Readiness probe DOES check upstream so K8s only routes traffic # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's # slower TCP handshake under load (was timeoutSeconds=1 default → # 172 historic restarts when the workstation primary path went down, # before the cluster was repointed to edge1-only on 2026-04-27). readinessProbe: httpGet: path: /api/tags port: 11434 initialDelaySeconds: 5 periodSeconds: 15 timeoutSeconds: 5 failureThreshold: 3 # Liveness probe hits ONLY local healthz — restarts the container # only when nginx itself is dead. Decoupling liveness from upstream # eliminates restart-loops caused by transient upstream outages. livenessProbe: httpGet: path: /healthz port: 11434 initialDelaySeconds: 10 periodSeconds: 30 timeoutSeconds: 3 failureThreshold: 3 - name: agent-zero image: agent0ai/agent-zero:latest command: ["/bin/bash", "-c"] args: - | # Install kubectl if not cached if [ -f /a0/work/kubectl ]; then cp /a0/work/kubectl /usr/local/bin/kubectl else curl -sLO "https://dl.k8s.io/release/v1.32.0/bin/linux/amd64/kubectl" && \ chmod +x kubectl && mv kubectl /usr/local/bin/kubectl && \ cp /usr/local/bin/kubectl /a0/work/kubectl fi # Link Blue Jay profile from workspace into Agent Zero's expected path ln -sfn /a0/work/.bluejay/agents/bluejay /a0/agents/bluejay # Write model config BEFORE initialize.sh loads it # The _model_config plugin reads config.json (NOT config.yaml). # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat, # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet). # api_key comes from OPENAI_API_KEY / A0_SET_chat_model_api_key. # Utility + embedding now share the same bridge surface so Agent # Zero stops talking to Ollama directly for those model lanes. # Browser stays on the local 127.0.0.1 proxy until the bridge has # a live Vision route and the in-pod tools stop calling Ollama. mkdir -p /a0/usr/plugins/_model_config cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG' {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}} MODELCFG # Strip heredoc indentation sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json # Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars # inside A0_SET_mcp_servers JSON, so build the final JSON here from # the secret-backed CHAT_MCP_API_KEY env var before initialize.sh. # Use the in-cluster Chat service URL rather than the public # Traefik hostname so the pod stays off the private VIP lane that # the default egress rule blocks. if [ -n "${CHAT_MCP_API_KEY:-}" ]; then export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}" fi # Run the original entrypoint exec /exe/initialize.sh $BRANCH ports: - containerPort: 80 env: # Agent identity - name: AGENT_NAME value: "Blue Jay (NUC)" # Chat model — routed through FlowerCore LLM Bridge (ADR-088) # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep) # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint. # Utility + embedding now share the bridge/auth surface too. # Browser stays on local Ollama until the bridge has a live # Vision route and the in-pod tools stop calling Ollama directly. - name: A0_SET_chat_model_provider value: "openai" - name: A0_SET_chat_model_name value: "fc:balanced" - name: A0_SET_chat_model_api_base value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1" - name: A0_SET_chat_model_api_key valueFrom: secretKeyRef: name: fc-llm-bridge-api-keys key: agent-zero-k8s # Agent Zero's runtime still resolves provider keys from the # provider-level env names (models.get_api_key -> OPENAI_API_KEY / # API_KEY_OPENAI), not the slot-scoped A0_SET_* value alone. # Mirror the same secret here so real public chat runs can reach # the fc-llm-bridge chat_model path instead of failing before MCP. - name: OPENAI_API_KEY valueFrom: secretKeyRef: name: fc-llm-bridge-api-keys key: agent-zero-k8s - name: A0_SET_chat_model_ctx_length value: "8192" - name: A0_SET_chat_model_kwargs value: '{"temperature": 0, "num_ctx": 8192}' # Utility model — fast small helper tier through the same proxy - name: A0_SET_util_model_provider value: "openai" - name: A0_SET_util_model_name value: "fc:cheap" - name: A0_SET_util_model_api_base value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1" - name: A0_SET_util_model_kwargs value: '{"num_ctx": 2048}' # Embedding model — bridge alias to nomic-embed-text on edge1 - name: A0_SET_embed_model_provider value: "openai" - name: A0_SET_embed_model_name value: "fc:embedding" - name: A0_SET_embed_model_api_base value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1" # Browser model — small Gemma candidate stays on the local proxy # until fc:vision is configured on the bridge. - name: A0_SET_browser_model_provider value: "ollama" - name: A0_SET_browser_model_name value: "gemma3:4b" - name: A0_SET_browser_model_api_base value: "http://127.0.0.1:11434" - name: A0_SET_browser_model_vision value: "true" # Agent profile — Blue Jay personality, tools, and system prompt - name: A0_SET_agent_profile value: "bluejay" # Memory settings - name: A0_SET_memory_memorize_enabled value: "true" - name: A0_SET_memory_memorize_consolidation value: "true" - name: A0_SET_memory_memorize_replace_threshold value: "0.85" - name: A0_SET_memory_recall_enabled value: "true" # Speech-to-text disabled (no GPU for Whisper) - name: A0_SET_stt_model_size value: "tiny" # FlowerCore.Chat MCP pilot (Phase 0) - name: CHAT_MCP_API_KEY valueFrom: secretKeyRef: name: chat-mcp-api-key key: api-key optional: true # Print.Web — Thermal printer service on edge2. # PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py # runs in-cluster and can reach edge2 directly on the PROD VLAN). # PRINT_WEB_API_KEY: from 1Password "Print.Web API Keys" password field, # synced by the print-web-api-keys OnePasswordItem CRD above. # The print_web.py Python tool reads both env vars for all HTTP calls. - name: PRINT_WEB_URL value: "http://10.0.57.16:5200" - name: PRINT_WEB_API_KEY valueFrom: secretKeyRef: name: print-web-api-keys key: password # Intranet search — use in-cluster HTTP (no step-ca TLS needed) # corpus_search.py reads FLOWERCORE_FLEET_VECTOR_DIR but that mount is not # on the cluster yet (BLUEJAY-WS only). The tool gracefully returns a # "no DB found" message with rebuild instructions rather than crashing. - name: FLOWERCORE_INTRANET_URL value: "http://intranet-web.intranet.svc:5300" # Kubernetes - name: KUBERNETES_SERVICE_HOST value: "kubernetes.default.svc" - name: KUBERNETES_SERVICE_PORT value: "443" volumeMounts: - name: workspace mountPath: /a0/work - name: knowledge mountPath: /a0/knowledge/custom/main - name: flowercore-extensions mountPath: /a0/extensions/flowercore readOnly: true - name: bluejay-theme mountPath: /a0/webui/static/css/custom readOnly: true startupProbe: httpGet: path: / port: 80 initialDelaySeconds: 15 periodSeconds: 10 failureThreshold: 18 livenessProbe: httpGet: path: / port: 80 periodSeconds: 30 failureThreshold: 3 readinessProbe: exec: command: - /bin/bash - -c - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null" periodSeconds: 30 failureThreshold: 2 resources: requests: memory: "2Gi" cpu: "1000m" limits: memory: "3Gi" cpu: "2000m" volumes: - name: workspace persistentVolumeClaim: claimName: agent-zero-data - name: knowledge persistentVolumeClaim: claimName: agent-zero-knowledge - name: bluejay-tools-a configMap: name: bluejay-tools-a - name: bluejay-tools-b configMap: name: bluejay-tools-b - name: bluejay-tools-c configMap: name: bluejay-tools-c - name: bluejay-profile configMap: name: bluejay-profile - name: bluejay-prompts configMap: name: bluejay-prompts - name: flowercore-extensions configMap: name: flowercore-extensions - name: bluejay-theme configMap: name: bluejay-theme --- apiVersion: v1 kind: Service metadata: name: agent-zero namespace: agent-zero spec: type: ClusterIP selector: app: agent-zero ports: - port: 80 targetPort: 80 # ============================================================================= # Traefik IngressRoute — LAN access at agent-zero.iamworkin.lan # ============================================================================= --- apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: agent-zero namespace: agent-zero spec: entryPoints: - websecure routes: - match: Host(`agent-zero.iamworkin.lan`) kind: Rule services: - name: agent-zero port: 80 tls: secretName: agent-zero-tls # ============================================================================= # TLS Certificate via cert-manager (step-ca ACME) # ============================================================================= --- apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: agent-zero-tls namespace: agent-zero spec: secretName: agent-zero-tls issuerRef: name: step-ca-acme kind: ClusterIssuer dnsNames: - agent-zero.iamworkin.lan duration: 720h renewBefore: 240h # ============================================================================= # NetworkPolicy — Restrict traffic # ============================================================================= --- apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: name: agent-zero-netpol namespace: agent-zero spec: podSelector: matchLabels: app: agent-zero policyTypes: - Ingress - Egress ingress: # Allow from Traefik - from: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: traefik-system ports: - port: 80 # Allow from monitoring (blackbox probe) - from: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: monitoring ports: - port: 80 egress: # DNS - to: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: kube-system ports: - port: 53 protocol: UDP - port: 53 protocol: TCP # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation # is private dev hardware and intentionally not allowlisted) - to: - ipBlock: cidr: 10.0.57.17/32 ports: - port: 11434 # Print.Web on edge2 - to: - ipBlock: cidr: 10.0.57.16/32 ports: - port: 5200 # K8s API - to: - ipBlock: cidr: 10.0.56.11/32 ports: - port: 6443 # FlowerCore LLM Bridge (ADR-088 chat_model routing) — ClusterIP service # in the fc-llm-bridge namespace on port 8080. - to: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: fc-llm-bridge ports: - port: 8080 protocol: TCP # FlowerCore.Chat MCP (Phase 0 pilot) — use the in-cluster chat-web # service instead of the public Traefik VIP so MCP traffic stays inside # the cluster and survives the private-range egress denylist. - to: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: fc-chat ports: - port: 80 protocol: TCP - port: 8080 protocol: TCP # Intranet search API — use in-cluster svc so traffic stays inside # the cluster and is not blocked by the private-range egress denylist. - to: - namespaceSelector: matchLabels: kubernetes.io/metadata.name: intranet ports: - port: 5300 protocol: TCP # Allow internet (for kubectl image pull, etc) - to: - ipBlock: cidr: 0.0.0.0/0 except: - 10.0.0.0/8 - 172.16.0.0/12 - 192.168.0.0/16