# =============================================================================
# Agent Zero AI Stack — NUC Deployment (RKE2 Bare-Metal)
# =============================================================================
# Deploys: AgentZero (agent UI) on RKE2 cluster with Blue Jay profile
# Ollama: edge1 Pi 5 + AI HAT+ ONLY (10.0.57.17:11434).
# Workstation Ollama (BLUEJAY-WS) is intentionally NOT in the upstream —
# the workstation is private dev hardware, not a cluster dependency.
# Target: RKE2 bare-metal cluster, namespace: agent-zero
# Profile: Blue Jay (21 tools, 3 prompts, 4 extensions, theme)
#
# Differences from LOCAL (WSL K3s):
#   - Uses Longhorn StorageClass (not local-path)
#   - Cluster-only Ollama path (edge1) — keeps workstation private
#   - NO Anthropic API key (free/local models only)
#   - NO Piper TTS or Kiwix (edge1 handles TTS, no Wikipedia needed)
#   - NO hostPath volumes — profile/tools/extensions loaded via ConfigMaps
#   - Traefik IngressRoute for LAN access at agent-zero.iamworkin.lan
#
# ConfigMaps (defined in configmaps-bluejay.yaml):
#   bluejay-tools        21 Python tool modules (~520K)
#   bluejay-profile      agent.json, agent.yaml, system_prompt.md (~20K)
#   bluejay-prompts      3 prompt templates (~11K)
#   flowercore-extensions 5 Python extension modules (~76K)
#   bluejay-theme        CSS theme (~7K)
#
# Apply: KUBECONFIG=~/.kube/rke2.yaml kubectl apply -f agent-zero-nuc.yaml
# =============================================================================

---
apiVersion: v1
kind: Namespace
metadata:
  name: agent-zero
  labels:
    app.kubernetes.io/part-of: agent-zero-stack

# =============================================================================
# Persistent Volume Claims (Longhorn)
# =============================================================================

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: agent-zero-data
  namespace: agent-zero
spec:
  accessModes: [ReadWriteOnce]
  storageClassName: longhorn
  resources:
    requests:
      storage: 5Gi

---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: agent-zero-knowledge
  namespace: agent-zero
spec:
  accessModes: [ReadWriteOnce]
  storageClassName: longhorn
  resources:
    requests:
      storage: 1Gi

# =============================================================================
# RBAC — Give Agent Zero kubectl access to the cluster
# =============================================================================

---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: agent-zero
  namespace: agent-zero

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: agent-zero-cluster-admin
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
  - kind: ServiceAccount
    name: agent-zero
    namespace: agent-zero

# =============================================================================
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
# =============================================================================
# Chat / utility / embedding lanes route through fc-llm-bridge. Browser keeps
# a local nginx proxy to edge1 Pi 5 + AI HAT+ until the bridge grows a live
# Vision route and the in-pod tools stop calling Ollama directly.
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions

---
# FC LLM Bridge API key for Agent Zero (ADR-088 chat / util / embed routing).
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
# Consumed by the OpenAI-compatible chat / util / embedding lanes. Browser
# stays on the local Ollama sidecar until fc:vision is configured on the bridge.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
  name: fc-llm-bridge-api-keys
  namespace: agent-zero
spec:
  itemPath: "vaults/IAmWorkin/items/FC LLM Bridge API Keys"

---
# Print.Web API key for Agent Zero's print_web.py Python tool.
# Syncs from 1Password item "Print.Web API Keys" (password field = API key).
# The print_web.py tool reads PRINT_WEB_API_KEY env var for all HTTP requests
# to the thermal print service (GET /api/mcp/tools, POST /api/print/*, etc.).
# Note: Print.Web uses the legacy REST MCP shape (/api/mcp/tools/*), not the
# streamable-http MCP protocol. The print_web Python tool bridges this gap
# and is already present in bluejay-tools ConfigMaps.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
  name: print-web-api-keys
  namespace: agent-zero
spec:
  itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"

---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: agent-zero
  namespace: agent-zero
  labels:
    app: agent-zero
  annotations:
    agent-zero/deployment: "nuc"
    agent-zero/profile: "bluejay"
    agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
spec:
  replicas: 1
  selector:
    matchLabels:
      app: agent-zero
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app: agent-zero
    spec:
      serviceAccountName: agent-zero
      initContainers:
        # Wait for edge1 Ollama to be reachable before starting Agent Zero.
        # (Workstation Ollama is intentionally NOT in the cluster path.)
        - name: wait-for-ollama
          image: busybox:1.37
          command: ["sh", "-c"]
          args:
            - |
              echo "Waiting for edge1 Ollama (10.0.57.17:11434)..."
              until wget -qO- --timeout=2 http://10.0.57.17:11434/api/tags >/dev/null 2>&1; do
                echo "edge1 Ollama not ready yet, retrying in 5s..."
                sleep 5
              done
              echo "edge1 Ollama is reachable."
        # Assemble the Blue Jay profile directory structure from ConfigMaps.
        # ConfigMaps can't create nested dirs, so we copy into the workspace PVC.
        - name: setup-bluejay
          image: busybox:1.37
          command: ["sh", "-c"]
          args:
            - |
              echo "Setting up Blue Jay profile..."
              # Profile root files
              mkdir -p /a0/work/.bluejay/agents/bluejay/tools
              mkdir -p /a0/work/.bluejay/agents/bluejay/prompts
              cp /tmp/bluejay-profile/* /a0/work/.bluejay/agents/bluejay/
              # Tools (split across 3 ConfigMaps to stay under K8s 262K annotation limit)
              cp /tmp/bluejay-tools-a/* /a0/work/.bluejay/agents/bluejay/tools/
              cp /tmp/bluejay-tools-b/* /a0/work/.bluejay/agents/bluejay/tools/
              cp /tmp/bluejay-tools-c/* /a0/work/.bluejay/agents/bluejay/tools/
              # Prompts
              cp /tmp/bluejay-prompts/* /a0/work/.bluejay/agents/bluejay/prompts/
              # Extensions
              mkdir -p /a0/work/.bluejay/extensions/flowercore
              cp /tmp/flowercore-extensions/* /a0/work/.bluejay/extensions/flowercore/
              # Theme
              mkdir -p /a0/work/.bluejay/theme
              cp /tmp/bluejay-theme/* /a0/work/.bluejay/theme/
              echo "Blue Jay profile ready:"
              echo "  Tools: $(ls /a0/work/.bluejay/agents/bluejay/tools/*.py | wc -l)"
              echo "  Prompts: $(ls /a0/work/.bluejay/agents/bluejay/prompts/*.md | wc -l)"
              echo "  Extensions: $(ls /a0/work/.bluejay/extensions/flowercore/*.py | wc -l)"
          volumeMounts:
            - name: workspace
              mountPath: /a0/work
            - name: bluejay-tools-a
              mountPath: /tmp/bluejay-tools-a
            - name: bluejay-tools-b
              mountPath: /tmp/bluejay-tools-b
            - name: bluejay-tools-c
              mountPath: /tmp/bluejay-tools-c
            - name: bluejay-profile
              mountPath: /tmp/bluejay-profile
            - name: bluejay-prompts
              mountPath: /tmp/bluejay-prompts
            - name: flowercore-extensions
              mountPath: /tmp/flowercore-extensions
            - name: bluejay-theme
              mountPath: /tmp/bluejay-theme
      containers:
        - name: ollama-proxy
          image: nginx:1.27-alpine
          command: ["/bin/sh", "-c"]
          args:
            - |
              cat > /etc/nginx/nginx.conf <<'NGINX'
              worker_processes  1;
              events { worker_connections 1024; }
              http {
                upstream ollama_upstream {
                  # edge1 Pi 5 + AI HAT+ is the SOLE upstream.
                  # Workstation Ollama (BLUEJAY-WS) is private dev hardware and
                  # MUST NOT be added back here without explicit operator decision —
                  # adding it would expose the workstation to cluster traffic.
                  server 10.0.57.17:11434 max_fails=2 fail_timeout=10s;
                  keepalive 16;
                }
                server {
                  listen 11434;
                  # Local healthcheck — proves nginx itself is alive.
                  # Must NOT depend on upstream so liveness doesn't restart
                  # the container when edge1 is slow/offline.
                  location = /healthz {
                    access_log off;
                    return 200 'ok\n';
                    default_type text/plain;
                  }
                  location / {
                    proxy_http_version 1.1;
                    proxy_set_header Connection "";
                    proxy_set_header Host $host;
                    proxy_connect_timeout 5s;
                    proxy_read_timeout 600s;
                    proxy_send_timeout 600s;
                    proxy_next_upstream error timeout invalid_header http_502 http_503 http_504;
                    proxy_pass http://ollama_upstream;
                  }
                }
              }
              NGINX
              exec nginx -g 'daemon off;'
          ports:
            - containerPort: 11434
          # Readiness probe DOES check upstream so K8s only routes traffic
          # when edge1 Ollama is reachable. timeoutSeconds=5 absorbs the Pi's
          # slower TCP handshake under load (was timeoutSeconds=1 default →
          # 172 historic restarts when the workstation primary path went down,
          # before the cluster was repointed to edge1-only on 2026-04-27).
          readinessProbe:
            httpGet:
              path: /api/tags
              port: 11434
            initialDelaySeconds: 5
            periodSeconds: 15
            timeoutSeconds: 5
            failureThreshold: 3
          # Liveness probe hits ONLY local healthz — restarts the container
          # only when nginx itself is dead. Decoupling liveness from upstream
          # eliminates restart-loops caused by transient upstream outages.
          livenessProbe:
            httpGet:
              path: /healthz
              port: 11434
            initialDelaySeconds: 10
            periodSeconds: 30
            timeoutSeconds: 3
            failureThreshold: 3
        - name: agent-zero
          image: agent0ai/agent-zero:latest
          command: ["/bin/bash", "-c"]
          args:
            - |
              # Install kubectl if not cached
              if [ -f /a0/work/kubectl ]; then
                cp /a0/work/kubectl /usr/local/bin/kubectl
              else
                curl -sLO "https://dl.k8s.io/release/v1.32.0/bin/linux/amd64/kubectl" && \
                chmod +x kubectl && mv kubectl /usr/local/bin/kubectl && \
                cp /usr/local/bin/kubectl /a0/work/kubectl
              fi
              # Link Blue Jay profile from workspace into Agent Zero's expected path
              ln -sfn /a0/work/.bluejay/agents/bluejay /a0/agents/bluejay
              # Write model config BEFORE initialize.sh loads it
              # The _model_config plugin reads config.json (NOT config.yaml).
              # chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
              # spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
              # api_key comes from OPENAI_API_KEY / A0_SET_chat_model_api_key.
              # Utility + embedding now share the same bridge surface so Agent
              # Zero stops talking to Ollama directly for those model lanes.
              # Browser stays on the local 127.0.0.1 proxy until the bridge has
              # a live Vision route and the in-pod tools stop calling Ollama.
              mkdir -p /a0/usr/plugins/_model_config
              cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
              {"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
              MODELCFG
              # Strip heredoc indentation
              sed -i 's/^              //' /a0/usr/plugins/_model_config/config.json
              # Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
              # inside A0_SET_mcp_servers JSON, so build the final JSON here from
              # the secret-backed CHAT_MCP_API_KEY env var before initialize.sh.
              # Use the in-cluster Chat service URL rather than the public
              # Traefik hostname so the pod stays off the private VIP lane that
              # the default egress rule blocks.
              if [ -n "${CHAT_MCP_API_KEY:-}" ]; then
                export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}"
              fi
              # Run the original entrypoint
              exec /exe/initialize.sh $BRANCH
          ports:
            - containerPort: 80
          env:
            # Agent identity
            - name: AGENT_NAME
              value: "Blue Jay (NUC)"
            # Chat model — routed through FlowerCore LLM Bridge (ADR-088)
            # so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
            # dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
            # Utility + embedding now share the bridge/auth surface too.
            # Browser stays on local Ollama until the bridge has a live
            # Vision route and the in-pod tools stop calling Ollama directly.
            - name: A0_SET_chat_model_provider
              value: "openai"
            - name: A0_SET_chat_model_name
              value: "fc:balanced"
            - name: A0_SET_chat_model_api_base
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            - name: A0_SET_chat_model_api_key
              valueFrom:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
            # Agent Zero's runtime still resolves provider keys from the
            # provider-level env names (models.get_api_key -> OPENAI_API_KEY /
            # API_KEY_OPENAI), not the slot-scoped A0_SET_* value alone.
            # Mirror the same secret here so real public chat runs can reach
            # the fc-llm-bridge chat_model path instead of failing before MCP.
            - name: OPENAI_API_KEY
              valueFrom:
                secretKeyRef:
                  name: fc-llm-bridge-api-keys
                  key: agent-zero-k8s
            - name: A0_SET_chat_model_ctx_length
              value: "8192"
            - name: A0_SET_chat_model_kwargs
              value: '{"temperature": 0, "num_ctx": 8192}'
            # Utility model — fast small helper tier through the same proxy
            - name: A0_SET_util_model_provider
              value: "openai"
            - name: A0_SET_util_model_name
              value: "fc:cheap"
            - name: A0_SET_util_model_api_base
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            - name: A0_SET_util_model_kwargs
              value: '{"num_ctx": 2048}'
            # Embedding model — bridge alias to nomic-embed-text on edge1
            - name: A0_SET_embed_model_provider
              value: "openai"
            - name: A0_SET_embed_model_name
              value: "fc:embedding"
            - name: A0_SET_embed_model_api_base
              value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
            # Browser model — small Gemma candidate stays on the local proxy
            # until fc:vision is configured on the bridge.
            - name: A0_SET_browser_model_provider
              value: "ollama"
            - name: A0_SET_browser_model_name
              value: "gemma3:4b"
            - name: A0_SET_browser_model_api_base
              value: "http://127.0.0.1:11434"
            - name: A0_SET_browser_model_vision
              value: "true"
            # Agent profile — Blue Jay personality, tools, and system prompt
            - name: A0_SET_agent_profile
              value: "bluejay"
            # Memory settings
            - name: A0_SET_memory_memorize_enabled
              value: "true"
            - name: A0_SET_memory_memorize_consolidation
              value: "true"
            - name: A0_SET_memory_memorize_replace_threshold
              value: "0.85"
            - name: A0_SET_memory_recall_enabled
              value: "true"
            # Speech-to-text disabled (no GPU for Whisper)
            - name: A0_SET_stt_model_size
              value: "tiny"
            # FlowerCore.Chat MCP pilot (Phase 0)
            - name: CHAT_MCP_API_KEY
              valueFrom:
                secretKeyRef:
                  name: chat-mcp-api-key
                  key: api-key
                  optional: true
            # Print.Web — Thermal printer service on edge2.
            # PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
            # runs in-cluster and can reach edge2 directly on the PROD VLAN).
            # PRINT_WEB_API_KEY: from 1Password "Print.Web API Keys" password field,
            # synced by the print-web-api-keys OnePasswordItem CRD above.
            # The print_web.py Python tool reads both env vars for all HTTP calls.
            - name: PRINT_WEB_URL
              value: "http://10.0.57.16:5200"
            - name: PRINT_WEB_API_KEY
              valueFrom:
                secretKeyRef:
                  name: print-web-api-keys
                  key: password
            # Intranet search — use in-cluster HTTP (no step-ca TLS needed)
            # corpus_search.py reads FLOWERCORE_FLEET_VECTOR_DIR but that mount is not
            # on the cluster yet (BLUEJAY-WS only). The tool gracefully returns a
            # "no DB found" message with rebuild instructions rather than crashing.
            - name: FLOWERCORE_INTRANET_URL
              value: "http://intranet-web.intranet.svc:5300"
            # Kubernetes
            - name: KUBERNETES_SERVICE_HOST
              value: "kubernetes.default.svc"
            - name: KUBERNETES_SERVICE_PORT
              value: "443"
          volumeMounts:
            - name: workspace
              mountPath: /a0/work
            - name: knowledge
              mountPath: /a0/knowledge/custom/main
            - name: flowercore-extensions
              mountPath: /a0/extensions/flowercore
              readOnly: true
            - name: bluejay-theme
              mountPath: /a0/webui/static/css/custom
              readOnly: true
          startupProbe:
            httpGet:
              path: /
              port: 80
            initialDelaySeconds: 15
            periodSeconds: 10
            failureThreshold: 18
          livenessProbe:
            httpGet:
              path: /
              port: 80
            periodSeconds: 30
            failureThreshold: 3
          readinessProbe:
            exec:
              command:
                - /bin/bash
                - -c
                - "curl -sf http://localhost:80/ > /dev/null && curl -sf --connect-timeout 3 http://127.0.0.1:11434/api/tags > /dev/null"
            periodSeconds: 30
            failureThreshold: 2
          resources:
            requests:
              memory: "2Gi"
              cpu: "1000m"
            limits:
              memory: "3Gi"
              cpu: "2000m"
      volumes:
        - name: workspace
          persistentVolumeClaim:
            claimName: agent-zero-data
        - name: knowledge
          persistentVolumeClaim:
            claimName: agent-zero-knowledge
        - name: bluejay-tools-a
          configMap:
            name: bluejay-tools-a
        - name: bluejay-tools-b
          configMap:
            name: bluejay-tools-b
        - name: bluejay-tools-c
          configMap:
            name: bluejay-tools-c
        - name: bluejay-profile
          configMap:
            name: bluejay-profile
        - name: bluejay-prompts
          configMap:
            name: bluejay-prompts
        - name: flowercore-extensions
          configMap:
            name: flowercore-extensions
        - name: bluejay-theme
          configMap:
            name: bluejay-theme

---
apiVersion: v1
kind: Service
metadata:
  name: agent-zero
  namespace: agent-zero
spec:
  type: ClusterIP
  selector:
    app: agent-zero
  ports:
    - port: 80
      targetPort: 80

# =============================================================================
# Traefik IngressRoute — LAN access at agent-zero.iamworkin.lan
# =============================================================================

---
apiVersion: traefik.io/v1alpha1
kind: IngressRoute
metadata:
  name: agent-zero
  namespace: agent-zero
spec:
  entryPoints:
    - websecure
  routes:
    - match: Host(`agent-zero.iamworkin.lan`)
      kind: Rule
      services:
        - name: agent-zero
          port: 80
  tls:
    secretName: agent-zero-tls

# =============================================================================
# TLS Certificate via cert-manager (step-ca ACME)
# =============================================================================

---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
  name: agent-zero-tls
  namespace: agent-zero
spec:
  secretName: agent-zero-tls
  issuerRef:
    name: step-ca-acme
    kind: ClusterIssuer
  dnsNames:
    - agent-zero.iamworkin.lan
  duration: 720h
  renewBefore: 240h

# =============================================================================
# NetworkPolicy — Restrict traffic
# =============================================================================

---
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: agent-zero-netpol
  namespace: agent-zero
spec:
  podSelector:
    matchLabels:
      app: agent-zero
  policyTypes:
    - Ingress
    - Egress
  ingress:
    # Allow from Traefik
    - from:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: traefik-system
      ports:
        - port: 80
    # Allow from monitoring (blackbox probe)
    - from:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: monitoring
      ports:
        - port: 80
  egress:
    # DNS
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: kube-system
      ports:
        - port: 53
          protocol: UDP
        - port: 53
          protocol: TCP
    # Ollama on edge1 Pi 5 + AI HAT+ (sole upstream — workstation
    # is private dev hardware and intentionally not allowlisted)
    - to:
        - ipBlock:
            cidr: 10.0.57.17/32
      ports:
        - port: 11434
    # Print.Web on edge2
    - to:
        - ipBlock:
            cidr: 10.0.57.16/32
      ports:
        - port: 5200
    # K8s API
    - to:
        - ipBlock:
            cidr: 10.0.56.11/32
      ports:
        - port: 6443
    # FlowerCore LLM Bridge (ADR-088 chat_model routing) — ClusterIP service
    # in the fc-llm-bridge namespace on port 8080.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: fc-llm-bridge
      ports:
        - port: 8080
          protocol: TCP
    # FlowerCore.Chat MCP (Phase 0 pilot) — use the in-cluster chat-web
    # service instead of the public Traefik VIP so MCP traffic stays inside
    # the cluster and survives the private-range egress denylist.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: fc-chat
      ports:
        - port: 80
          protocol: TCP
        - port: 8080
          protocol: TCP
    # Intranet search API — use in-cluster svc so traffic stays inside
    # the cluster and is not blocked by the private-range egress denylist.
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: intranet
      ports:
        - port: 5300
          protocol: TCP
    # Allow internet (for kubectl image pull, etc)
    - to:
        - ipBlock:
            cidr: 0.0.0.0/0
            except:
              - 10.0.0.0/8
              - 172.16.0.0/12
              - 192.168.0.0/16