Compare commits

..

14 Commits

Author SHA1 Message Date
Andrew Stoltz
fbbc07023b deploy(fc-llm-bridge): roll fc:vision image v202604300022
Source: FlowerCore.LlmBridge@8dd181c (feat: fc:vision route + image
content forwarding). Adds:

- fc:vision tier alias parsing (TryParseTier handles fc:vision,
  FC:VISION, openai/fc:vision, vision)
- Image content forwarding: OpenAi image_url shape (https URL +
  data:[mediaType];base64,... URI) and Anthropic image/source
  passthrough are now promoted to LlmContentBlocks. Text-only
  content-parts arrays still flatten to the legacy joined string.
- DefaultRoutes seeder + appsettings.json gain Vision -> Anthropic +
  claude-sonnet-4-6.

Image built on BLUEJAY-WS, podman save + ctr import to all 3 RKE2
nodes (rke2-server, rke2-agent1, rke2-agent2). Bridge tests: 62/62
green (was 51/51, +11). Backwards-compatible with current chat /
util / embed callers; existing routes unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-30 00:26:45 -05:00
Andrew Stoltz
4b0eef0fb0 deploy(fc-llm-bridge): roll alias-fix image v20260430001132 2026-04-30 00:13:48 -05:00
Andrew Stoltz
bb09a3786f fix(knowledge): pin live manifest to bundled edition path 2026-04-29 23:37:02 -05:00
Andrew Stoltz
006dbcf671 fix(agent-zero): export knowledge mcp gate to python builder 2026-04-29 23:32:55 -05:00
Andrew Stoltz
1be71d6ba7 fix(agent-zero): export mcp servers without python indent errors 2026-04-29 23:19:48 -05:00
Andrew Stoltz
0c8026c912 fix(agent-zero): avoid heredoc break in mcp bootstrap 2026-04-29 23:16:54 -05:00
Andrew Stoltz
621ae47e00 fix(agent-zero): repair fc knowledge mcp manifest 2026-04-29 23:11:57 -05:00
Andrew Stoltz
ae6b8c0142 fix(knowledge): keep mcp key env on new token secret 2026-04-29 23:06:07 -05:00
Andrew Stoltz
da55220218 feat(agent-zero): wire fc_knowledge phase1 rollout 2026-04-29 22:59:19 -05:00
Andrew Stoltz
b1ad253dd6 fix(agent-zero): prefix bridge embedding alias for litellm 2026-04-29 21:14:12 -05:00
Andrew Stoltz
ee935f6e07 fix(agent-zero): keep internal util/embed on bridge v1 2026-04-29 21:09:04 -05:00
Andrew Stoltz
2853ee2024 chore(bridge): bump fc-llm-bridge image tag v202604292028 2026-04-29 20:50:55 -05:00
Andrew Stoltz
b4a34e16ca refactor(agent-zero): drop ollama-proxy sidecar (Phase 3) 2026-04-29 20:50:55 -05:00
Andrew Stoltz
0d5a1fd530 fix(agent-zero): route util and embed through llm bridge 2026-04-29 19:14:01 -05:00
4 changed files with 118 additions and 44 deletions

View File

@@ -92,13 +92,16 @@ subjects:
# =============================================================================
# Agent Zero — AI Agent Web UI (NUC Edition, Blue Jay Profile)
# =============================================================================
# Connects directly to fc-llm-bridge for chat + util + embeddings + browser.
# Connects directly to fc-llm-bridge for chat + internal util/embed + browser.
# Agent Zero's internal util/embed slots stay on the bridge's OpenAI-compatible
# /v1 surface, while browser + corpus-search use the Ollama-compatible /api/*
# surface through OLLAMA_HOST.
# Blue Jay profile with 21 tools, 3 prompts, 4 extensions.
---
# FC LLM Bridge API key for Agent Zero (ADR-088 chat/util/embed/browser routing).
# Syncs from 1Password item "FC LLM Bridge API Keys" (field: agent-zero-k8s).
# Consumed by chat, util, embeddings, browser, and corpus-search requests
# Consumed by chat, internal util/embed, browser, and corpus-search requests
# that traverse fc-llm-bridge.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
@@ -124,6 +127,18 @@ metadata:
spec:
itemPath: "vaults/IAmWorkin/items/Print.Web API Keys"
---
# Knowledge MCP bearer token for the direct Agent Zero -> Knowledge.Web path.
# The 1Password item currently stores the raw token in its concealed PASSWORD
# field, which the operator syncs to Secret key `password`.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: knowledge-mcp-tokens
namespace: agent-zero
spec:
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
---
apiVersion: apps/v1
kind: Deployment
@@ -135,7 +150,7 @@ metadata:
annotations:
agent-zero/deployment: "nuc"
agent-zero/profile: "bluejay"
agent-zero/ollama: "edge1 Pi 5 + AI HAT+ only (10.0.57.17:11434) — workstation Ollama is private dev hardware, not a cluster dependency"
agent-zero/ollama: "fc-llm-bridge fronts edge1 Pi 5 + AI HAT+ Ollama for cluster browser/corpus-search traffic; internal chat/util/embed route through the bridge's authenticated OpenAI surface"
spec:
replicas: 1
selector:
@@ -228,23 +243,41 @@ spec:
# chat_model: FlowerCore LLM Bridge (ADR-088) — OpenAI-compat,
# spend-tracked, tier-aliased (fc:balanced → Claude Sonnet).
# api_key comes from A0_SET_chat_model_api_key env var (overrides
# config.json). Utility / embedding / browser all point at the
# same bridge root and use Ollama-compatible endpoints there.
# config.json). Utility + embedding stay on the authenticated
# OpenAI-compatible /v1 surface; browser and direct tool traffic
# use the bridge's Ollama-compatible root via OLLAMA_HOST.
mkdir -p /a0/usr/plugins/_model_config
cat > /a0/usr/plugins/_model_config/config.json << 'MODELCFG'
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"ollama","name":"qwen2.5:1.5b","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"ollama","name":"nomic-embed-text","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080","kwargs":{}}}
{"allow_chat_override":true,"chat_model":{"provider":"openai","name":"fc:balanced","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_history":0.7,"vision":false,"kwargs":{"temperature":0,"num_ctx":8192}},"utility_model":{"provider":"openai","name":"fc:cheap","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","ctx_length":8192,"ctx_input":0.7,"kwargs":{"num_ctx":8192}},"embedding_model":{"provider":"openai","name":"openai/fc:embedding","api_base":"http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1","kwargs":{}}}
MODELCFG
# Strip heredoc indentation
sed -i 's/^ //' /a0/usr/plugins/_model_config/config.json
# Phase 0 Chat MCP pilot: Agent Zero does not interpolate env vars
# inside A0_SET_mcp_servers JSON, so build the final JSON here from
# the secret-backed CHAT_MCP_API_KEY env var before initialize.sh.
# Use the in-cluster Chat service URL rather than the public
# Traefik hostname so the pod stays off the private VIP lane that
# the default egress rule blocks.
if [ -n "${CHAT_MCP_API_KEY:-}" ]; then
export A0_SET_mcp_servers="{\"mcpServers\":{\"fc-chat\":{\"type\":\"streamable-http\",\"url\":\"http://chat-web.fc-chat.svc/mcp\",\"headers\":{\"X-Api-Key\":\"${CHAT_MCP_API_KEY}\"}}}}"
# the secret-backed env vars before initialize.sh. Keep the local
# corpus_search.py tool mounted either way so outage fallback
# remains available even when fc_knowledge is not advertised.
export KNOWLEDGE_MCP_ENABLED=false
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
if curl -sf --connect-timeout 3 "${KNOWLEDGE_MCP_HEALTH_URL}" > /dev/null && \
curl -sf --connect-timeout 5 \
-H "Authorization: Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}" \
-H "Accept: application/json, text/event-stream" \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","id":"fc-knowledge-bootstrap","method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"agent-zero-bootstrap","version":"1.0"}}}' \
"${KNOWLEDGE_MCP_URL}" > /dev/null; then
export KNOWLEDGE_MCP_ENABLED=true
echo "fc_knowledge enabled from ${KNOWLEDGE_MCP_URL}."
else
echo "fc_knowledge unavailable or unauthorized; keeping local corpus_search.py as the fallback path."
fi
else
echo "fc_knowledge token missing; keeping local corpus_search.py as the fallback path."
fi
export A0_SET_mcp_servers="$(
python3 -c 'import json, os; servers = {}; chat_key = os.getenv("CHAT_MCP_API_KEY"); knowledge_enabled = os.getenv("KNOWLEDGE_MCP_ENABLED", "false").lower() == "true"; token = os.getenv("KNOWLEDGE_MCP_BEARER_TOKEN", "") if knowledge_enabled else ""; chat_key and servers.setdefault("fc_chat", {"type": "streamable-http", "url": "http://chat-web.fc-chat.svc/mcp", "headers": {"X-Api-Key": chat_key}}); token and servers.setdefault("fc_knowledge", {"type": "streamable-http", "url": os.getenv("KNOWLEDGE_MCP_URL", "http://knowledge-web.knowledge.svc/mcp"), "headers": {"Authorization": f"Bearer {token}"}}); print(json.dumps({"mcpServers": servers}, separators=(",", ":")))'
)"
# Run the original entrypoint
exec /exe/initialize.sh $BRANCH
ports:
@@ -256,8 +289,9 @@ spec:
# Chat model — routed through FlowerCore LLM Bridge (ADR-088)
# so spend is tracked and tier aliases (fc:cheap/fc:balanced/fc:deep)
# dispatch to Ollama or Anthropic via a single OpenAI-compat endpoint.
# Utility / embedding / browser now traverse fc-llm-bridge too so
# Agent Zero no longer needs a local Ollama proxy sidecar.
# Internal utility + embedding use the authenticated OpenAI surface,
# while browser/corpus-search use the bridge's Ollama-compatible
# endpoints so Agent Zero no longer needs a local proxy sidecar.
- name: A0_SET_chat_model_provider
value: "openai"
- name: A0_SET_chat_model_name
@@ -288,32 +322,24 @@ spec:
value: "8192"
- name: A0_SET_chat_model_kwargs
value: '{"temperature": 0, "num_ctx": 8192}'
# Utility model — fast small helper tier through the same proxy
# Utility model — fast small helper tier through the OpenAI surface
- name: A0_SET_util_model_provider
value: "ollama"
value: "openai"
- name: A0_SET_util_model_name
value: "qwen2.5:1.5b"
value: "fc:cheap"
- name: A0_SET_util_model_api_base
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_util_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
- name: A0_SET_util_model_kwargs
value: '{"num_ctx": 2048}'
# Embedding model — nomic through the same proxy
# Embedding model — authenticated bridge alias to nomic-embed-text.
# LiteLLM's embedding() path needs an explicit provider prefix here
# even though the chat slot can use bare fc:* aliases.
- name: A0_SET_embed_model_provider
value: "ollama"
value: "openai"
- name: A0_SET_embed_model_name
value: "nomic-embed-text"
value: "openai/fc:embedding"
- name: A0_SET_embed_model_api_base
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080"
- name: A0_SET_embed_model_api_key
valueFrom:
secretKeyRef:
name: fc-llm-bridge-api-keys
key: agent-zero-k8s
value: "http://fc-llm-bridge.fc-llm-bridge.svc:8080/v1"
# Browser model — small Gemma candidate through the same proxy
- name: A0_SET_browser_model_provider
value: "ollama"
@@ -354,6 +380,19 @@ spec:
name: chat-mcp-api-key
key: api-key
optional: true
# FlowerCore.Knowledge MCP Phase 1 — direct Agent Zero client path.
# Probe /healthz first, then try an authenticated initialize call.
# If either fails, Agent Zero boots without fc_knowledge and keeps
# the local corpus_search.py tool as the outage-safe path.
- name: KNOWLEDGE_MCP_URL
value: "http://knowledge-web.knowledge.svc/mcp"
- name: KNOWLEDGE_MCP_HEALTH_URL
value: "http://knowledge-web.knowledge.svc/healthz"
- name: KNOWLEDGE_MCP_BEARER_TOKEN
valueFrom:
secretKeyRef:
name: knowledge-mcp-tokens
key: password
# Print.Web — Thermal printer service on edge2.
# PRINT_WEB_URL: internal HTTP (bypasses Traefik TLS — print_web.py
# runs in-cluster and can reach edge2 directly on the PROD VLAN).
@@ -578,6 +617,17 @@ spec:
protocol: TCP
- port: 8080
protocol: TCP
# FlowerCore.Knowledge MCP (Phase 1) — in-cluster direct route with
# anonymous /healthz probe plus authenticated /mcp initialize/tool calls.
- to:
- namespaceSelector:
matchLabels:
kubernetes.io/metadata.name: knowledge
ports:
- port: 80
protocol: TCP
- port: 8080
protocol: TCP
# Intranet search API — use in-cluster svc so traffic stays inside
# the cluster and is not blocked by the private-range egress denylist.
- to:

View File

@@ -97,7 +97,7 @@ spec:
# dotnet.exe publish -c Release -o deploy/app \
# src/FlowerCore.LlmBridge.Web/FlowerCore.LlmBridge.Web.csproj
# podman build -t localhost/fc-llm-bridge:v<tag> -f deploy/Dockerfile.deploy deploy
image: localhost/fc-llm-bridge:v202604292028
image: localhost/fc-llm-bridge:v202604300022
imagePullPolicy: Never
ports:
- containerPort: 8080

View File

@@ -5,7 +5,9 @@ Phase 2.4 closed. Pod running, certificate issued (step-ca-acme), PVC
bound (Longhorn 20Gi RWO), ArgoCD `infra-knowledge` synced. `/healthz`
returns 200, `/api/v1/editions` returns `[]` (initial-deploy state — no
*.db files in the PVC yet; Phase 2.5+ admin UI handles bulk
population).
population). Phase 1 of the Agent Zero MCP rollout keeps `/healthz`
anonymous and gates `/mcp` behind `Authorization: Bearer <token>` built
from the 1Password item `FlowerCore Knowledge MCP Tokens`.
- Plan: [`../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md`](../../../FlowerCore.Notes/docs/ai-agents/flowercore-knowledge-service-plan.md)
- Sprint: [`../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md`](../../../FlowerCore.Notes/docs/ai-station/sprint-e-xxl-plan.md) (Track B)
@@ -19,6 +21,12 @@ search to the rest of the FC ecosystem (Agent Zero, Chat.Web persona
memory, AiStation embeddings explorer, TtsReader chapter context, BMO
bot, Pi nodes via `fc-index sync`).
Phase 1 MCP routing is explicit:
- in-cluster Agent Zero → `http://knowledge-web.knowledge.svc/mcp`
- workstation Agent Zero → `https://knowledge.iamworkin.lan/mcp`
- probe URL for both lanes → `/healthz`
## Deployment order (do NOT skip / reorder)
### 1. FlowerCore.DNS public A record — knowledge.iamworkin.lan -> 10.0.56.200

View File

@@ -40,16 +40,16 @@ metadata:
labels:
app.kubernetes.io/part-of: bluejay-infra
---
# MCP API key — synced from 1Password so /mcp stays gated without baking
# secrets into Git. The PASSWORD category maps the concealed field to Secret
# key `password`, which the Deployment reads into FlowerCore:Mcp:ApiKey:Key.
# MCP bearer token for the read-only Agent Zero Phase 1 lane. The 1Password
# item currently stores the raw token in its concealed PASSWORD field, which
# the operator syncs into the namespaced Secret key `password`.
apiVersion: onepassword.com/v1
kind: OnePasswordItem
metadata:
name: knowledge-mcp-api-key
name: knowledge-mcp-tokens
namespace: knowledge
spec:
itemPath: "vaults/IAmWorkin/items/KnowledgeApiKey"
itemPath: "vaults/IAmWorkin/items/FlowerCore Knowledge MCP Tokens"
---
apiVersion: v1
kind: PersistentVolumeClaim
@@ -102,8 +102,17 @@ spec:
- name: web
# Placeholder tag — bump to the image you built + imported to ALL
# RKE2 nodes via scripts/deploy-knowledge.sh before applying.
image: localhost/fc-knowledge-web:v202604272200
image: localhost/fc-knowledge-web:v20260429232635
imagePullPolicy: Never
command:
- /bin/sh
- -c
args:
- |
if [ -n "${KNOWLEDGE_MCP_BEARER_TOKEN:-}" ]; then
export FlowerCore__Mcp__ApiKey__Key="Bearer ${KNOWLEDGE_MCP_BEARER_TOKEN}"
fi
exec dotnet FlowerCore.Knowledge.Web.dll
ports:
- containerPort: 8080
name: http
@@ -115,7 +124,7 @@ spec:
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
value: "false"
# Vector-store directory + embedding model + edition profile dir.
# Profile JSON is baked into the image at /app/editions via the
# Profile JSON is baked into the image at /home/app/editions via the
# csproj Content-link from FlowerCore.Common/editions/.
- name: Knowledge__VectorStoresDirectory
value: "/data/vector-stores"
@@ -126,7 +135,7 @@ spec:
- name: Knowledge__MaxLimit
value: "50"
- name: FlowerCore__Editions__ProfileDirectory
value: "/app/editions"
value: "/home/app/editions"
# Embed via edge1 Pi 5 + AI HAT+ (10.0.57.17:11434). Cluster
# services do not depend on BLUEJAY-WS (private dev hardware) per
# bluejay-infra@0f9d56e. Query-time embedding is fast enough on
@@ -138,7 +147,14 @@ spec:
- name: FlowerCore__Mcp__ApiKey__Key
valueFrom:
secretKeyRef:
name: knowledge-mcp-api-key
name: knowledge-mcp-tokens
key: password
- name: FlowerCore__Mcp__ApiKey__HeaderName
value: "Authorization"
- name: KNOWLEDGE_MCP_BEARER_TOKEN
valueFrom:
secretKeyRef:
name: knowledge-mcp-tokens
key: password
resources:
requests:
@@ -185,7 +201,7 @@ spec:
- name: tmp
mountPath: /tmp
- name: logs
mountPath: /app/logs
mountPath: /home/app/logs
volumes:
- name: vector-store
persistentVolumeClaim: