From c1a43c64b30cba36395cc1a229d6d6cdec5570c4 Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Thu, 11 Jun 2026 16:05:40 -0500 Subject: [PATCH] deploy(worldbuilder): enable live gpu backend --- apps/worldbuilder/README.md | 40 ++++++++++++----------------- apps/worldbuilder/worldbuilder.yaml | 22 ++++++++-------- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/apps/worldbuilder/README.md b/apps/worldbuilder/README.md index 42ef8b3..4c641f0 100644 --- a/apps/worldbuilder/README.md +++ b/apps/worldbuilder/README.md @@ -12,28 +12,27 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master) in pfSense Unbound before this manifest is applied, or cert-manager HTTP-01 silently exponential-backs-off ~2h. Memory: `feedback_pfsense_dns_required_for_acme`. -2. **Image import to ALL RKE2 nodes** — pod can schedule to any of - `rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12), - `rke2-agent2` (10.0.56.13). Build with: +2. **Image import to ALL Ready RKE2 nodes** — pod can currently schedule to + `rke2-server` (10.0.56.11) and `rke2-agent1` (10.0.56.12). Build with: ```bash bash deploy/build.sh # in FlowerCore.WorldBuilder repo - podman save localhost/fc-worldbuilder:v -o /tmp/fc-worldbuilder-v.tar - for h in 10.0.56.11 10.0.56.12 10.0.56.13; do - scp /tmp/fc-worldbuilder-v.tar fcadmin@$h:/tmp/ + mkdir -p artifacts/deploy + podman save localhost/fc-worldbuilder:v -o artifacts/deploy/fc-worldbuilder-v.tar + for h in 10.0.56.11 10.0.56.12; do + ssh fcadmin@$h "mkdir -p /home/fcadmin/.fcv" + scp artifacts/deploy/fc-worldbuilder-v.tar fcadmin@$h:/home/fcadmin/.fcv/ ssh fcadmin@$h \ "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \ - -n k8s.io images import /tmp/fc-worldbuilder-v.tar" + -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v.tar" done ``` Memory: `feedback_rke2_image_import_per_node_scp`. 3. **Bump image tag** in `worldbuilder.yaml` and git push. ArgoCD ApplicationSet picks up within ~3 minutes. -4. **First production render** — open - `https://worldbuilder.iamworkin.lan/studio/c32e0000-0000-4000-8000-000000000004` - and confirm the Cyberpunk Blue Jay demo prompt loads with five seeded fake - generated images. This Sprint 32 visitor-safe profile uses - `ClientMode=fake`; switch the image-generation env vars back to ComfyUI only - for an operator-owned GPU render lane. +4. **First production render** — verify + `https://worldbuilder.iamworkin.lan/healthz`, open + `https://worldbuilder.iamworkin.lan/settings`, and confirm the image backend + reports ComfyUI before running an operator-owned render lane. ## Health probes @@ -56,13 +55,8 @@ Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master) ## Image generation backend -Sprint 32 pins the Kubernetes profile to -`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=fake` with -`BaseUrl=http://127.0.0.1:1`. That keeps the public/internal visitor demo -deterministic, avoids GPU exposure, and still exercises the studio/gallery -surface with persisted generated-image metadata. - -The previous ComfyUI backend target was `http://10.0.56.20:8188` on -BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1). Re-enable it only in an -operator-owned follow-up that also verifies workstation reachability and image -import freshness. +The live internal profile now uses +`FlowerCore:WorldBuilder:ImageGeneration:ClientMode=comfyui` with +`BaseUrl=http://10.0.56.20:8188` on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2). +Keep the public host pre-staging disabled unless the five safe-to-expose gates +are rechecked; the live GPU lane is operator-owned and internal-only. diff --git a/apps/worldbuilder/worldbuilder.yaml b/apps/worldbuilder/worldbuilder.yaml index 7f4202f..bdb5847 100644 --- a/apps/worldbuilder/worldbuilder.yaml +++ b/apps/worldbuilder/worldbuilder.yaml @@ -5,10 +5,10 @@ # # Image build (BLUEJAY-WS): # bash deploy/build.sh # in FlowerCore.WorldBuilder repo -# podman save localhost/fc-worldbuilder:v -o /tmp/fc-worldbuilder-v.tar -# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do -# scp /tmp/fc-worldbuilder-v.tar fcadmin@$h:/tmp/ -# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v.tar" +# podman save localhost/fc-worldbuilder:v -o artifacts/deploy/fc-worldbuilder-v.tar +# for h in 10.0.56.11 10.0.56.12; do +# scp artifacts/deploy/fc-worldbuilder-v.tar fcadmin@$h:/home/fcadmin/.fcv/ +# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /home/fcadmin/.fcv/fc-worldbuilder-v.tar" # done --- apiVersion: v1 @@ -90,7 +90,7 @@ spec: containers: - name: web # Bump tag for each rebuild. Initial deploy: v202605062048 - image: localhost/fc-worldbuilder:v202605062048 + image: localhost/fc-worldbuilder:v20260611-b4a0025-gpu imagePullPolicy: Never ports: - containerPort: 8080 @@ -117,14 +117,16 @@ spec: value: "/data/gallery" - name: FlowerCore__WorldBuilder__Export__RootPath value: "/data/exports" - # Visitor-safe Sprint 32 profile: fake backend keeps public demo - # rendering deterministic and avoids exposing BLUEJAY-WS GPU. + # Operator-approved live GPU lane. Internal-only host targets + # BLUEJAY-WS ComfyUI; keep public host pre-staging disabled below. - name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl - value: "http://127.0.0.1:1" + value: "http://10.0.56.20:8188" - name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode - value: "fake" + value: "comfyui" - name: FlowerCore__WorldBuilder__ImageGeneration__BackendId - value: "fake" + value: "comfyui" + - name: FlowerCore__WorldBuilder__ImageGeneration__VisitorSafe + value: "false" resources: # Cluster CPU-request budget runs hot (99% on all 3 nodes at deploy # time) while actual CPU usage is well below capacity. Idle Blazor