fix(agent-zero): move corpus_search + intranet_search into bluejay-tools-c
The prior commit b71f9e4 created a stray YAML document between the
bluejay-tools-c and bluejay-profile sections. kubectl applied the stray
block's data to bluejay-profile (wrong ConfigMap, wrong mount target).
The setup-bluejay initContainer copies bluejay-tools-{a,b,c} to the tools
directory; bluejay-profile is copied to the agent profile directory. Tools
must live in one of the three tools ConfigMaps.
Fix: insert corpus_search.py and intranet_search.py directly into the
bluejay-tools-c YAML document (before kind/metadata, matching the
data-first layout the rest of the file uses). Also fix two mojibake
characters (→ and ·) that were corrupted in the prior commit.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -13150,27 +13150,19 @@ data:
|
||||
- PowerShell 5.1 compatibility is assumed (no PowerShell 7+ features).
|
||||
- All commands run with `-NoProfile -NonInteractive` flags for clean execution.
|
||||
"""
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: bluejay-tools-c
|
||||
namespace: agent-zero
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
data:
|
||||
corpus_search.py: |
|
||||
# FlowerCore Fleet Corpus Vector Search Tool
|
||||
#
|
||||
# Queries the AiStation-built SqliteVecVectorStore DB at /a0/usr/vectors/fleet.db
|
||||
# (bind-mounted read-only from /var/lib/flowercore/vector-stores/ on the host).
|
||||
# Embeds the query through Ollama's nomic-embed-text model, computes cosine
|
||||
# similarity against every stored chunk in pure Python (no numpy — not present
|
||||
# similarity against every stored chunk in pure Python (no numpy — not present
|
||||
# in the container), and returns the top-K nearest neighbors with source metadata.
|
||||
#
|
||||
# This is the offline-friendly counterpart to `intranet_search` (which hits the
|
||||
# Intranet's live REST API). Use it for Bible/Greek/Hebrew/Strong's lookups and
|
||||
# anywhere the workstation has a newer DB than the Intranet one. The store is
|
||||
# refreshed by `aistation-indexer build <edition>` — see the FlowerCore.Knowledge
|
||||
# refreshed by `aistation-indexer build <edition>` — see the FlowerCore.Knowledge
|
||||
# ADR at docs/ai-agents/flowercore-knowledge-service-plan.md.
|
||||
|
||||
import json
|
||||
@@ -13188,7 +13180,7 @@ data:
|
||||
"/a0/usr/vectors",
|
||||
)
|
||||
# When the caller doesn't pick an explicit DB, prefer the biggest fleet tier
|
||||
# present on disk. Workstation → pi-edge → bmo-bot.
|
||||
# present on disk. Workstation → pi-edge → bmo-bot.
|
||||
PREFERRED_DB_ORDER = [
|
||||
os.environ.get("FLOWERCORE_FLEET_VECTOR_DB", ""),
|
||||
"fleet-workstation-full.db",
|
||||
@@ -13291,10 +13283,10 @@ data:
|
||||
passage = h.get("passage") or ""
|
||||
lang = h.get("language") or ""
|
||||
meta_bits = [x for x in (h["index"], h["repo"], passage, lang) if x]
|
||||
meta = " · ".join(meta_bits)
|
||||
meta = " · ".join(meta_bits)
|
||||
preview = h["text"]
|
||||
if len(preview) > 320:
|
||||
preview = preview[:320].rstrip() + "…"
|
||||
preview = preview[:320].rstrip() + "…"
|
||||
lines.append(f"{rank}. **{h['score']:.3f}** {meta}")
|
||||
lines.append(f" `{h['source']}`")
|
||||
lines.append(f" {preview}")
|
||||
@@ -13349,8 +13341,8 @@ data:
|
||||
|
||||
|
||||
def _cosine(a: list, b: list) -> float:
|
||||
"""Cosine similarity in pure Python — no numpy in the A0 container."""
|
||||
# zip() stops at the shorter — AiStation DB guarantees same dim per index.
|
||||
"""Cosine similarity in pure Python — no numpy in the A0 container."""
|
||||
# zip() stops at the shorter — AiStation DB guarantees same dim per index.
|
||||
dot = 0.0
|
||||
na = 0.0
|
||||
nb = 0.0
|
||||
@@ -13373,8 +13365,8 @@ data:
|
||||
EstimatedTokens, EmbeddingJson)
|
||||
|
||||
Embeddings are stored as JSON arrays in EmbeddingJson; similarity is computed
|
||||
in Python. For ~100k chunks × 768 dims this takes a couple seconds on a
|
||||
workstation — acceptable for interactive A0 use.
|
||||
in Python. For ~100k chunks × 768 dims this takes a couple seconds on a
|
||||
workstation — acceptable for interactive A0 use.
|
||||
"""
|
||||
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
|
||||
try:
|
||||
@@ -13398,7 +13390,7 @@ data:
|
||||
cursor = conn.execute("".join(sql), params)
|
||||
|
||||
# Min-heap by (score, ...) would be faster but for interactive use we
|
||||
# just sort at the end — simpler and readable.
|
||||
# just sort at the end — simpler and readable.
|
||||
scored = []
|
||||
for row in cursor:
|
||||
idx, chunk_id, text, repo, source_file, book, chapter, verses, lang, emb_json = row
|
||||
@@ -13455,7 +13447,7 @@ data:
|
||||
).fetchall())
|
||||
for name, dim, updated in idx_rows:
|
||||
count = counts.get(name, 0)
|
||||
lines.append(f"- **{name}** — {count:,} chunks × {dim}d (built {updated})")
|
||||
lines.append(f"- **{name}** — {count:,} chunks × {dim}d (built {updated})")
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
@@ -13477,11 +13469,12 @@ data:
|
||||
).fetchall()
|
||||
if not rows:
|
||||
return "(no indexes)"
|
||||
return ", ".join(f"{r[0]}({r[2]}×{r[1]}d)" for r in rows)
|
||||
return ", ".join(f"{r[0]}({r[2]}×{r[1]}d)" for r in rows)
|
||||
finally:
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
return f"(couldn't list: {e})"
|
||||
|
||||
intranet_search.py: |
|
||||
# Intranet Vector Search Tool
|
||||
# Queries the Blue Jay Lab Intranet's Shared.Indexing RAG corpus over its
|
||||
@@ -13597,6 +13590,15 @@ data:
|
||||
lines.append("")
|
||||
|
||||
return Response(message="\n".join(lines), break_loop=False)
|
||||
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: bluejay-tools-c
|
||||
namespace: agent-zero
|
||||
|
||||
---
|
||||
apiVersion: v1
|
||||
data:
|
||||
agent.json: |
|
||||
{
|
||||
"title": "Blue Jay",
|
||||
|
||||
Reference in New Issue
Block a user