Compare commits
7 Commits
claude/blu
...
claude/blu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a89a76e39 | ||
|
|
4b777b16ac | ||
|
|
8c60e3a4d3 | ||
|
|
df02b4c3c3 | ||
|
|
c0dceafffd | ||
|
|
490db8f9e6 | ||
|
|
1926bdaf3b |
@@ -532,7 +532,7 @@ spec:
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
image: localhost/fc-ttsreader-web:v202604301236-b6ca2d5
|
||||
image: localhost/fc-ttsreader-web:v202605061500
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5217
|
||||
|
||||
@@ -46,7 +46,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: intranet-web
|
||||
image: localhost/fc-intranet-web:v20260505-1041
|
||||
image: localhost/fc-intranet-web:v20260505-1108
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 5300
|
||||
|
||||
@@ -1024,6 +1024,72 @@ data:
|
||||
summary: "Longhorn node {{ $labels.node }} not Ready"
|
||||
description: "Node {{ $labels.node }} reports ready=false (reason: {{ $labels.condition_reason }}). Volumes scheduled to this node will be unavailable until it recovers."
|
||||
|
||||
# ============================================================
|
||||
# FC Signage Marquee Performance — Track 3 + 8 (2026-05-06)
|
||||
# Live-mirrored from FlowerCore.Notes/scripts/monitoring/alerts.yml.
|
||||
# Source-of-truth for the live Podman Prometheus on noc1 is the
|
||||
# Notes file; this K8s ConfigMap exists so a future migration to
|
||||
# in-cluster Prometheus inherits the ruleset automatically.
|
||||
# See feedback_monitoring_k8s_target_vs_live_podman.
|
||||
# ============================================================
|
||||
- name: fc-signage-marquee
|
||||
rules:
|
||||
- alert: MarqueeDroppedFramesHigh
|
||||
expr: |
|
||||
(
|
||||
sum by (renderer, phase, node_id) (rate(marquee_dropped_frames_total[5m]))
|
||||
/
|
||||
sum by (renderer, phase, node_id) (rate(marquee_render_latency_ms_count[5m]))
|
||||
) > 0.05
|
||||
unless on()
|
||||
absent_over_time(marquee_dropped_frames_total[7d])
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: signage
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Marquee dropped-frame rate >5% on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||
description: "Renderer {{ $labels.renderer }} on {{ $labels.node_id }} drops >5% of frames during {{ $labels.phase }}. Animation visibly stuttery."
|
||||
|
||||
- alert: MarqueeRenderLatencyP99High
|
||||
expr: |
|
||||
histogram_quantile(
|
||||
0.99,
|
||||
sum by (renderer, phase, node_id, le) (rate(marquee_render_latency_ms_bucket[5m]))
|
||||
) > 16
|
||||
unless on()
|
||||
absent_over_time(marquee_render_latency_ms_bucket[7d])
|
||||
for: 10m
|
||||
labels:
|
||||
severity: warning
|
||||
service: signage
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Marquee render latency p99 > 16ms on {{ $labels.renderer }}/{{ $labels.node_id }} ({{ $labels.phase }})"
|
||||
description: "Per-frame render latency p99 has exceeded the Pi-class 16ms budget for 10 minutes."
|
||||
|
||||
- alert: MarqueeAnimationDurationDrift
|
||||
expr: |
|
||||
abs(
|
||||
histogram_quantile(0.5, sum by (renderer, phase, le) (rate(marquee_animation_duration_ms_bucket[15m])))
|
||||
-
|
||||
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||
)
|
||||
/
|
||||
on (phase) group_left() avg by (phase) (marquee_animation_duration_target_ms)
|
||||
> 0.10
|
||||
unless on()
|
||||
absent_over_time(marquee_animation_duration_ms_bucket[7d])
|
||||
for: 15m
|
||||
labels:
|
||||
severity: info
|
||||
service: signage
|
||||
alert_channel: irc
|
||||
annotations:
|
||||
summary: "Marquee animation duration drifting > 10% on {{ $labels.renderer }} ({{ $labels.phase }})"
|
||||
description: "Median observed cycle duration deviates from target DurationMs by >10%. Could indicate browser tab throttling, GPU pressure, or phase-advancement bug."
|
||||
|
||||
# =============================================================================
|
||||
# ConfigMap: Blackbox Exporter Configuration
|
||||
# =============================================================================
|
||||
|
||||
60
apps/worldbuilder/README.md
Normal file
60
apps/worldbuilder/README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
# FlowerCore.WorldBuilder
|
||||
|
||||
ArgoCD-managed manifest for FlowerCore.WorldBuilder.Web — comic / storyboard
|
||||
authoring service that drives ComfyUI for panel image generation and
|
||||
QuestPDF for letter / A4 export.
|
||||
|
||||
Source: `D:\git\FlowerCore\FlowerCore.WorldBuilder` (master)
|
||||
|
||||
## Deployment order
|
||||
|
||||
1. **DNS preflight** — `worldbuilder.iamworkin.lan -> 10.0.56.200` MUST exist
|
||||
in pfSense Unbound before this manifest is applied, or cert-manager
|
||||
HTTP-01 silently exponential-backs-off ~2h.
|
||||
Memory: `feedback_pfsense_dns_required_for_acme`.
|
||||
2. **Image import to ALL RKE2 nodes** — pod can schedule to any of
|
||||
`rke2-server` (10.0.56.11), `rke2-agent1` (10.0.56.12),
|
||||
`rke2-agent2` (10.0.56.13). Build with:
|
||||
```bash
|
||||
bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||
podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||
for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||
scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||
ssh fcadmin@$h \
|
||||
"sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock \
|
||||
-n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||
done
|
||||
```
|
||||
Memory: `feedback_rke2_image_import_per_node_scp`.
|
||||
3. **Bump image tag** in `worldbuilder.yaml` and git push.
|
||||
ArgoCD ApplicationSet picks up within ~3 minutes.
|
||||
4. **First production render** — open `https://worldbuilder.iamworkin.lan`,
|
||||
create World → Character → Storyboard → ExportJob, confirm artifact
|
||||
downloads. ComfyUI lives on BLUEJAY-WS at `http://10.0.56.20:8188`.
|
||||
|
||||
## Health probes
|
||||
|
||||
- `startupProbe` + `readinessProbe`: `httpGet /healthz` (registered explicitly
|
||||
in Program.cs — anonymous, no DB or OpenAPI dependency).
|
||||
- `livenessProbe`: `tcpSocket` as a cheap fallback.
|
||||
Memory: `feedback_k8s_probes_must_not_hit_openapi`,
|
||||
`feedback_k8s_probes_behind_auth_middleware`.
|
||||
|
||||
## Storage
|
||||
|
||||
- Longhorn RWO PVC `worldbuilder-data` (5Gi) mounted at `/data`. SQLite DB
|
||||
lives at `/data/worldbuilder.db`, generated images under `/data/gallery/`,
|
||||
PDF/PNG exports under `/data/exports/`.
|
||||
- DataProtection keys persist to the same SQLite via
|
||||
`AddFlowerCoreDataProtection<WorldBuilderDbContext>` — explicit migration
|
||||
`20260429133417_Initial` already creates `fc_dp_keys`.
|
||||
Memory: `feedback_dataprotection_keys_persist_to_app_dbcontext`,
|
||||
`feedback_intranet_dataprotection_table_must_have_explicit_migration`.
|
||||
|
||||
## Image generation backend
|
||||
|
||||
`FlowerCore:WorldBuilder:ImageGeneration:BaseUrl=http://10.0.56.20:8188` —
|
||||
ComfyUI runs on BLUEJAY-WS Windows (R9700 / gfx1201 / ROCm 7.2.1). Pod reaches
|
||||
the workstation directly across the 10.0.56.0/24 VLAN (no Podman-style host-
|
||||
filter issues — K8s pods route via Calico, which is L3-routed across the
|
||||
VLAN).
|
||||
203
apps/worldbuilder/worldbuilder.yaml
Normal file
203
apps/worldbuilder/worldbuilder.yaml
Normal file
@@ -0,0 +1,203 @@
|
||||
# FlowerCore.WorldBuilder — comic / storyboard authoring service.
|
||||
#
|
||||
# Deployment + Service + PVC + Certificate + IngressRoute. ArgoCD-managed
|
||||
# end-to-end. See apps/worldbuilder/README.md for the per-deploy runbook.
|
||||
#
|
||||
# Image build (BLUEJAY-WS):
|
||||
# bash deploy/build.sh # in FlowerCore.WorldBuilder repo
|
||||
# podman save localhost/fc-worldbuilder:v<TAG> -o /tmp/fc-worldbuilder-v<TAG>.tar
|
||||
# for h in 10.0.56.11 10.0.56.12 10.0.56.13; do
|
||||
# scp /tmp/fc-worldbuilder-v<TAG>.tar fcadmin@$h:/tmp/
|
||||
# ssh fcadmin@$h "sudo /var/lib/rancher/rke2/bin/ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import /tmp/fc-worldbuilder-v<TAG>.tar"
|
||||
# done
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
---
|
||||
# SQLite DB + generated image gallery + PDF/PNG exports.
|
||||
# Longhorn RWO — single replica with `Recreate` rollout strategy keeps it safe.
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: worldbuilder-data
|
||||
namespace: fc-worldbuilder
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: worldbuilder-web
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
strategy:
|
||||
# RWO PVC + single replica. Recreate avoids multi-attach overlap.
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics/prometheus"
|
||||
spec:
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
containers:
|
||||
- name: web
|
||||
# Bump tag for each rebuild. Initial deploy: v202605062048
|
||||
image: localhost/fc-worldbuilder:v202605062048
|
||||
imagePullPolicy: Never
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
env:
|
||||
- name: ASPNETCORE_URLS
|
||||
value: "http://+:8080"
|
||||
- name: ASPNETCORE_ENVIRONMENT
|
||||
value: "Production"
|
||||
- name: DOTNET_RUNNING_IN_CONTAINER
|
||||
value: "true"
|
||||
- name: DOTNET_SYSTEM_GLOBALIZATION_INVARIANT
|
||||
value: "false"
|
||||
# SQLite path overrides (default appsettings uses relative paths).
|
||||
- name: ConnectionStrings__DefaultConnection
|
||||
value: "Data Source=/data/worldbuilder.db"
|
||||
- name: FlowerCore__Database__Provider
|
||||
value: "Sqlite"
|
||||
- name: FlowerCore__Database__ConnectionStrings__Sqlite
|
||||
value: "Data Source=/data/worldbuilder.db"
|
||||
# Generated image gallery + exports persist on /data.
|
||||
- name: FlowerCore__WorldBuilder__ImageStore__RootPath
|
||||
value: "/data/gallery"
|
||||
- name: FlowerCore__WorldBuilder__Export__RootPath
|
||||
value: "/data/exports"
|
||||
# ComfyUI on BLUEJAY-WS (R9700 / gfx1201 / ROCm 7.2.1).
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__BaseUrl
|
||||
value: "http://10.0.56.20:8188"
|
||||
- name: FlowerCore__WorldBuilder__ImageGeneration__ClientMode
|
||||
value: "comfyui"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 768Mi
|
||||
# /healthz is registered explicitly in Program.cs (anonymous, no DB
|
||||
# or OpenAPI dependency). Liveness uses tcpSocket as a cheap fallback
|
||||
# in case future middleware changes accidentally gate /healthz.
|
||||
# Memory: feedback_k8s_probes_must_not_hit_openapi,
|
||||
# feedback_k8s_probes_behind_auth_middleware.
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
failureThreshold: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 8080
|
||||
periodSeconds: 10
|
||||
failureThreshold: 3
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
failureThreshold: 3
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1654
|
||||
runAsGroup: 1654
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: logs
|
||||
mountPath: /app/logs
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: worldbuilder-data
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: logs
|
||||
emptyDir: {}
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: worldbuilder-web
|
||||
namespace: fc-worldbuilder
|
||||
labels:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app.kubernetes.io/name: worldbuilder-web
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 8080
|
||||
---
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: worldbuilder-web-tls
|
||||
namespace: fc-worldbuilder
|
||||
spec:
|
||||
secretName: worldbuilder-web-tls
|
||||
issuerRef:
|
||||
name: step-ca-acme
|
||||
kind: ClusterIssuer
|
||||
dnsNames:
|
||||
- worldbuilder.iamworkin.lan
|
||||
duration: 2160h # 90d
|
||||
renewBefore: 720h # 30d
|
||||
---
|
||||
apiVersion: traefik.io/v1alpha1
|
||||
kind: IngressRoute
|
||||
metadata:
|
||||
name: worldbuilder-web
|
||||
namespace: fc-worldbuilder
|
||||
spec:
|
||||
entryPoints:
|
||||
- websecure
|
||||
routes:
|
||||
- match: Host(`worldbuilder.iamworkin.lan`)
|
||||
kind: Rule
|
||||
services:
|
||||
- name: worldbuilder-web
|
||||
port: 80
|
||||
tls:
|
||||
secretName: worldbuilder-web-tls
|
||||
Reference in New Issue
Block a user