Compare commits
1 Commits
67064c4129
...
codex/sign
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5cf665e77d |
@@ -1,171 +0,0 @@
|
|||||||
# fc-redis — SignalR backplane for cross-product event bus
|
|
||||||
#
|
|
||||||
# Lands per Q-SO-1 resolution (2026-05-11 PM): SignalR backplane in Phase A,
|
|
||||||
# not Phase C as originally drafted. Operator directive: "Redis can be
|
|
||||||
# deployed just fine as it's another FlowerCore technology we'll want to
|
|
||||||
# manage."
|
|
||||||
#
|
|
||||||
# Phase A scope (this file):
|
|
||||||
# - Single Redis 7.x Alpine pod
|
|
||||||
# - 1Gi Longhorn RWO PVC for AOF persistence
|
|
||||||
# - ClusterIP Service at `redis.fc-redis.svc.cluster.local:6379`
|
|
||||||
# - No AUTH (in-cluster only; not exposed externally)
|
|
||||||
# - No IngressRoute (backplane is server-to-server only)
|
|
||||||
#
|
|
||||||
# Consumers (Phase A IMPL across FC services):
|
|
||||||
# - FlowerCore.Signage.Web (OpsConsoleHub)
|
|
||||||
# - FlowerCore.Scoreboard.Web (ScoreboardHub)
|
|
||||||
# - FlowerCore.SignalControl.Web
|
|
||||||
# - FlowerCore.DMS.Web
|
|
||||||
# - Any other product joining the cross-product event bus
|
|
||||||
#
|
|
||||||
# Each consumer adds:
|
|
||||||
# services.AddSignalR()
|
|
||||||
# .AddStackExchangeRedis(
|
|
||||||
# "redis.fc-redis.svc.cluster.local:6379",
|
|
||||||
# opts => opts.Configuration.ChannelPrefix =
|
|
||||||
# StackExchange.Redis.RedisChannel.Literal("fc-opsconsole"));
|
|
||||||
#
|
|
||||||
# Phase B / C follow-ons (out of scope here):
|
|
||||||
# - Redis Sentinel for HA (3-node)
|
|
||||||
# - AUTH password from 1Password Connect (rotate via /rotate-password)
|
|
||||||
# - redis_exporter sidecar for Prometheus scrape
|
|
||||||
# - Network policies restricting which namespaces can dial 6379
|
|
||||||
#
|
|
||||||
# Design: docs/signage/operations-console-phase-2-design.md §3.5
|
|
||||||
# Decision: Q-SO-1 (RESOLVED 2026-05-11 PM)
|
|
||||||
# Memory: feedback_blooming_ui_pattern_no_iframes
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: fc-redis
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
app.kubernetes.io/managed-by: argocd
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: PersistentVolumeClaim
|
|
||||||
metadata:
|
|
||||||
name: fc-redis-data
|
|
||||||
namespace: fc-redis
|
|
||||||
spec:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
storageClassName: longhorn
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 1Gi
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: fc-redis-config
|
|
||||||
namespace: fc-redis
|
|
||||||
data:
|
|
||||||
redis.conf: |
|
|
||||||
# Phase A — minimal config; no AUTH, no replication.
|
|
||||||
bind 0.0.0.0
|
|
||||||
protected-mode no
|
|
||||||
port 6379
|
|
||||||
tcp-backlog 511
|
|
||||||
timeout 0
|
|
||||||
tcp-keepalive 300
|
|
||||||
|
|
||||||
# Persistence: AOF (fsync every second is the standard SignalR-backplane
|
|
||||||
# durability sweet spot — the backplane only needs to survive Redis
|
|
||||||
# restarts, not absolute zero loss).
|
|
||||||
appendonly yes
|
|
||||||
appendfsync everysec
|
|
||||||
auto-aof-rewrite-percentage 100
|
|
||||||
auto-aof-rewrite-min-size 64mb
|
|
||||||
|
|
||||||
# Reasonable defaults — let Redis pick most things.
|
|
||||||
maxmemory-policy allkeys-lru
|
|
||||||
maxmemory 256mb
|
|
||||||
|
|
||||||
# Logging
|
|
||||||
loglevel notice
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: fc-redis
|
|
||||||
namespace: fc-redis
|
|
||||||
labels:
|
|
||||||
app: fc-redis
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
strategy:
|
|
||||||
type: Recreate # RWO PVC; do not do rolling update
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: fc-redis
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: fc-redis
|
|
||||||
spec:
|
|
||||||
securityContext:
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 999 # redis:7-alpine default uid
|
|
||||||
runAsGroup: 999
|
|
||||||
fsGroup: 999
|
|
||||||
containers:
|
|
||||||
- name: redis
|
|
||||||
image: redis:7-alpine
|
|
||||||
imagePullPolicy: IfNotPresent
|
|
||||||
command: ["redis-server", "/etc/redis/redis.conf"]
|
|
||||||
ports:
|
|
||||||
- name: redis
|
|
||||||
containerPort: 6379
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: "50m"
|
|
||||||
memory: "128Mi"
|
|
||||||
limits:
|
|
||||||
cpu: "500m"
|
|
||||||
memory: "384Mi"
|
|
||||||
volumeMounts:
|
|
||||||
- name: data
|
|
||||||
mountPath: /data
|
|
||||||
- name: config
|
|
||||||
mountPath: /etc/redis
|
|
||||||
readOnly: true
|
|
||||||
livenessProbe:
|
|
||||||
tcpSocket:
|
|
||||||
port: 6379
|
|
||||||
initialDelaySeconds: 5
|
|
||||||
periodSeconds: 10
|
|
||||||
readinessProbe:
|
|
||||||
exec:
|
|
||||||
command: ["redis-cli", "ping"]
|
|
||||||
initialDelaySeconds: 2
|
|
||||||
periodSeconds: 5
|
|
||||||
securityContext:
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
readOnlyRootFilesystem: true
|
|
||||||
capabilities:
|
|
||||||
drop: [ALL]
|
|
||||||
volumes:
|
|
||||||
- name: data
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: fc-redis-data
|
|
||||||
- name: config
|
|
||||||
configMap:
|
|
||||||
name: fc-redis-config
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: redis
|
|
||||||
namespace: fc-redis
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
selector:
|
|
||||||
app: fc-redis
|
|
||||||
ports:
|
|
||||||
- name: redis
|
|
||||||
port: 6379
|
|
||||||
targetPort: 6379
|
|
||||||
protocol: TCP
|
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
# fc-signage-appletv
|
|
||||||
|
|
||||||
Apple TV signage is a sealed appliance running the `FlowerCore.Signage.Agent.AppleTv` tvOS app per ADR-134.
|
|
||||||
|
|
||||||
This ApplicationSet entry is documentation and inventory metadata only. It intentionally creates no `Deployment`, `Service`, or `Pod`.
|
|
||||||
|
|
||||||
The Apple TV app connects outbound to existing FC.Signage.Web surfaces:
|
|
||||||
|
|
||||||
- `https://signage.iamworkin.lan/hub/signage` for SignalR live status.
|
|
||||||
- `GET /api/v1/nodes/{nodeId}/state` for the 30 second polling fallback.
|
|
||||||
- `POST /api/v1/nodes/register` and `POST /api/v1/nodes/{nodeId}/enroll` for pairing and mTLS enrollment.
|
|
||||||
- `POST /api/v1/nodes/{nodeId}/heartbeat` for metrics, current content identity, and local audit excerpts.
|
|
||||||
|
|
||||||
Distribution is via Apple Developer Enterprise Program or TestFlight plus FC.Distribution / UpdateCenter publishing once Apple credentials are available.
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- manifest.yaml
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
# Apple TV signage is a sealed tvOS appliance. This ArgoCD app intentionally
|
|
||||||
# carries documentation metadata only; no Deployment, Service, or Pod resources
|
|
||||||
# are created for the player.
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ConfigMap
|
|
||||||
metadata:
|
|
||||||
name: fc-signage-appletv-docs
|
|
||||||
namespace: fc-signage
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: fc-signage-appletv
|
|
||||||
app.kubernetes.io/part-of: flowercore-signage
|
|
||||||
flowercore.io/manifest-kind: docs-only
|
|
||||||
data:
|
|
||||||
README: |
|
|
||||||
FlowerCore.Signage.Agent.AppleTv is distributed through Apple Developer
|
|
||||||
Enterprise Program or TestFlight, not Kubernetes.
|
|
||||||
|
|
||||||
The app connects outbound to FC.Signage.Web:
|
|
||||||
- SignalR: https://signage.iamworkin.lan/hub/signage
|
|
||||||
- Polling fallback: GET /api/v1/nodes/{nodeId}/state
|
|
||||||
- Enrollment: POST /api/v1/nodes/{nodeId}/enroll
|
|
||||||
- Heartbeat: POST /api/v1/nodes/{nodeId}/heartbeat
|
|
||||||
|
|
||||||
This placeholder gives ArgoCD and inventory dashboards a first-class
|
|
||||||
Apple TV signage app entry without creating runtime pods.
|
|
||||||
@@ -1,17 +0,0 @@
|
|||||||
# FlowerCore Signage Pi Player
|
|
||||||
|
|
||||||
Phase 1 Raspberry Pi signage player packaging for Chromium kiosk deployments.
|
|
||||||
This bundle is intentionally air-gap friendly: systemd units, shell scripts,
|
|
||||||
udev rules, and Chromium managed policy are all checked into the repo and are
|
|
||||||
installed by `FlowerCore.Puppet`.
|
|
||||||
|
|
||||||
## Scope
|
|
||||||
|
|
||||||
- Bootstrap a stable node identity and mTLS client certificate.
|
|
||||||
- Launch Chromium in kiosk mode against `FC.Signage.Web` player routes.
|
|
||||||
- Restart the kiosk on HDMI hotplug.
|
|
||||||
- Renew mTLS certificates daily when fewer than 30 days remain.
|
|
||||||
- Detect display capabilities at boot, daily, and on HDMI hotplug.
|
|
||||||
|
|
||||||
Phase 2 native Avalonia rendering is documented separately in Notes and remains
|
|
||||||
deferred.
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"AutofillAddressEnabled": false,
|
|
||||||
"AutofillCreditCardEnabled": false,
|
|
||||||
"PasswordManagerEnabled": false,
|
|
||||||
"BrowserSignin": 0,
|
|
||||||
"MetricsReportingEnabled": false,
|
|
||||||
"SafeBrowsingProtectionLevel": 0,
|
|
||||||
"DefaultNotificationsSetting": 2,
|
|
||||||
"DefaultPopupsSetting": 2,
|
|
||||||
"BackgroundModeEnabled": false,
|
|
||||||
"DefaultBrowserSettingEnabled": false,
|
|
||||||
"PromotionalTabsEnabled": false,
|
|
||||||
"CommandLineFlagSecurityWarningsEnabled": false,
|
|
||||||
"ExtensionInstallBlocklist": ["*"]
|
|
||||||
}
|
|
||||||
@@ -1,132 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
|
||||||
CERT_DIR="/etc/fc-signage-player"
|
|
||||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
|
||||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
|
||||||
|
|
||||||
CONNECTORS=()
|
|
||||||
for dir in /sys/class/drm/card*-HDMI-A-*; do
|
|
||||||
[[ -e "$dir/status" ]] || continue
|
|
||||||
if [[ "$(cat "$dir/status")" == "connected" ]]; then
|
|
||||||
CONNECTORS+=("$(basename "$dir")")
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ ${#CONNECTORS[@]} -eq 0 ]]; then
|
|
||||||
CAPABILITIES_JSON=$(jq -n --arg id "$NODE_ID" '{
|
|
||||||
nodeId: $id,
|
|
||||||
platform: "linux-arm64-pi",
|
|
||||||
displayConnected: false,
|
|
||||||
detectedAt: (now | todate),
|
|
||||||
note: "No HDMI display detected"
|
|
||||||
}')
|
|
||||||
else
|
|
||||||
PRIMARY="${CONNECTORS[0]}"
|
|
||||||
EDID_PATH="/sys/class/drm/${PRIMARY}/edid"
|
|
||||||
WIDTH=0
|
|
||||||
HEIGHT=0
|
|
||||||
REFRESH=60
|
|
||||||
HDR=false
|
|
||||||
AUDIO_HDMI=false
|
|
||||||
MFG=""
|
|
||||||
MODEL=""
|
|
||||||
PHYSICAL_SIZE=null
|
|
||||||
|
|
||||||
if [[ -s "$EDID_PATH" ]] && command -v edid-decode >/dev/null 2>&1; then
|
|
||||||
EDID_INFO=$(edid-decode < "$EDID_PATH" 2>/dev/null || true)
|
|
||||||
MFG=$(echo "$EDID_INFO" | grep -m1 -oP 'Manufacturer:\s*\K\S+' || true)
|
|
||||||
MODEL=$(echo "$EDID_INFO" | grep -m1 -oP 'Model:\s*\K\S+' || true)
|
|
||||||
PREF=$(echo "$EDID_INFO" | grep -m1 -oP '\d+x\d+\s*@\s*\d+(?:\.\d+)?\s*Hz' || true)
|
|
||||||
if [[ -n "$PREF" ]]; then
|
|
||||||
WIDTH=$(echo "$PREF" | grep -oP '^\d+')
|
|
||||||
HEIGHT=$(echo "$PREF" | grep -oP 'x\K\d+')
|
|
||||||
REFRESH=$(echo "$PREF" | grep -oP '@\s*\K[\d.]+' | cut -d. -f1)
|
|
||||||
fi
|
|
||||||
if echo "$EDID_INFO" | grep -qiE 'HDR (Static|Dynamic) Metadata Block'; then HDR=true; fi
|
|
||||||
if echo "$EDID_INFO" | grep -qiE 'CEA Audio Block|Audio Format Descriptor'; then AUDIO_HDMI=true; fi
|
|
||||||
PH_W=$(echo "$EDID_INFO" | grep -m1 -oP 'Maximum image size:\s*\K\d+\s*cm\s*x\s*\d+' || true)
|
|
||||||
if [[ -n "$PH_W" ]]; then
|
|
||||||
PH_CM_W=$(echo "$PH_W" | grep -oP '^\d+')
|
|
||||||
PH_CM_H=$(echo "$PH_W" | grep -oP 'x\s*\K\d+')
|
|
||||||
if (( PH_CM_W > 0 && PH_CM_H > 0 )); then
|
|
||||||
PHYSICAL_SIZE=$(awk -v w="$PH_CM_W" -v h="$PH_CM_H" 'BEGIN { printf "%.1f", sqrt(w*w + h*h)/2.54 }')
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$WIDTH" == "0" ]] && command -v kmsprint >/dev/null 2>&1; then
|
|
||||||
KMS=$(kmsprint 2>/dev/null | grep -A2 "$PRIMARY" | grep -oP '\d+x\d+' | head -1 || true)
|
|
||||||
if [[ -n "$KMS" ]]; then
|
|
||||||
WIDTH=$(echo "$KMS" | grep -oP '^\d+')
|
|
||||||
HEIGHT=$(echo "$KMS" | grep -oP 'x\K\d+')
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
AUDIO_ALSA=false
|
|
||||||
if aplay -l 2>/dev/null | grep -qi 'card.*HDMI'; then AUDIO_ALSA=true; fi
|
|
||||||
HAS_AUDIO=false
|
|
||||||
if [[ "$AUDIO_HDMI" == "true" && "$AUDIO_ALSA" == "true" ]]; then HAS_AUDIO=true; fi
|
|
||||||
|
|
||||||
CAPABILITIES_JSON=$(jq -n \
|
|
||||||
--arg id "$NODE_ID" \
|
|
||||||
--argjson w "$WIDTH" \
|
|
||||||
--argjson h "$HEIGHT" \
|
|
||||||
--argjson r "$REFRESH" \
|
|
||||||
--argjson hdr "$HDR" \
|
|
||||||
--argjson audio "$HAS_AUDIO" \
|
|
||||||
--arg connector "$PRIMARY" \
|
|
||||||
--arg mfg "$MFG" \
|
|
||||||
--arg model "$MODEL" \
|
|
||||||
--argjson size "$PHYSICAL_SIZE" \
|
|
||||||
'{
|
|
||||||
nodeId: $id,
|
|
||||||
platform: "linux-arm64-pi",
|
|
||||||
displayConnected: true,
|
|
||||||
detectedAt: (now | todate),
|
|
||||||
hardware: {
|
|
||||||
maxResolution: { width: $w, height: $h },
|
|
||||||
nativeResolution: { width: $w, height: $h },
|
|
||||||
refreshRateHz: $r,
|
|
||||||
colorDepth: ($hdr | if . then "Color30Hdr" else "Color24" end),
|
|
||||||
hasAudioOutput: $audio,
|
|
||||||
audioChannelCount: ($audio | if . then 2 else 0 end),
|
|
||||||
physicalSizeInches: $size,
|
|
||||||
connector: $connector,
|
|
||||||
manufacturer: $mfg,
|
|
||||||
modelName: $model
|
|
||||||
},
|
|
||||||
render: { codecs: ["h264", "vp9", "mp4"] }
|
|
||||||
}')
|
|
||||||
fi
|
|
||||||
|
|
||||||
ENDPOINT_CANDIDATES=(
|
|
||||||
"${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/capabilities"
|
|
||||||
"${SIGNAGE_URL}/api/v1/displays/${NODE_ID}/capability-profile"
|
|
||||||
)
|
|
||||||
|
|
||||||
SUCCESS=false
|
|
||||||
for url in "${ENDPOINT_CANDIDATES[@]}"; do
|
|
||||||
HTTP_STATUS=$(curl -sk -o /tmp/cap-response.json -w "%{http_code}" \
|
|
||||||
--max-time 10 \
|
|
||||||
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
|
||||||
-X POST "$url" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$CAPABILITIES_JSON" || echo "000")
|
|
||||||
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" || "$HTTP_STATUS" == "204" ]]; then
|
|
||||||
SUCCESS=true
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
mkdir -p /var/log/fc-signage-player
|
|
||||||
if [[ "$SUCCESS" != "true" ]]; then
|
|
||||||
echo "[$(date -Is)] capability declare: no endpoint accepted the profile; logging locally" \
|
|
||||||
| tee -a /var/log/fc-signage-player/capabilities.log
|
|
||||||
echo "$CAPABILITIES_JSON" | tee -a /var/log/fc-signage-player/capabilities.log
|
|
||||||
else
|
|
||||||
echo "[$(date -Is)] capability declare: ok ($url)" | tee -a /var/log/fc-signage-player/capabilities.log
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "$CAPABILITIES_JSON"
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
|
||||||
CERT_DIR="/etc/fc-signage-player"
|
|
||||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
|
||||||
SETUP_CODE_FILE="/etc/flowercore/signage-setup-code"
|
|
||||||
|
|
||||||
mkdir -p /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
|
||||||
chown fc-signage:fc-signage /etc/flowercore "$CERT_DIR" /var/log/fc-signage-player
|
|
||||||
chmod 0750 "$CERT_DIR"
|
|
||||||
|
|
||||||
if [[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]; then
|
|
||||||
ENROLLED=$(jq -r '.enrolledAt // empty' "$NODE_JSON")
|
|
||||||
if [[ -n "$ENROLLED" ]]; then
|
|
||||||
echo "[$(date -Is)] bootstrap: already enrolled at $ENROLLED; skipping"
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -s "$NODE_JSON" ]]; then
|
|
||||||
NODE_UUID=$(jq -r '.nodeUuid // empty' "$NODE_JSON")
|
|
||||||
MACHINE_ID=$(jq -r '.machineId // empty' "$NODE_JSON")
|
|
||||||
else
|
|
||||||
NODE_UUID=$(uuidgen)
|
|
||||||
MACHINE_ID=$(echo "$NODE_UUID" | tr -d '-' | cut -c1-16)
|
|
||||||
jq -n --arg uuid "$NODE_UUID" --arg machine "$MACHINE_ID" --arg host "$(hostname -f)" --arg ts "$(date -Is)" \
|
|
||||||
'{nodeUuid: $uuid, machineId: $machine, hostname: $host, platform: "linux-arm64-pi", createdAt: $ts}' \
|
|
||||||
> "$NODE_JSON"
|
|
||||||
chmod 0640 "$NODE_JSON"
|
|
||||||
chown fc-signage:fc-signage "$NODE_JSON"
|
|
||||||
fi
|
|
||||||
|
|
||||||
SETUP_CODE=""
|
|
||||||
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
|
||||||
SETUP_CODE=$(tr -d '\r\n\t ' < "$SETUP_CODE_FILE")
|
|
||||||
fi
|
|
||||||
|
|
||||||
MODEL=$(tr -d '\0' < /sys/firmware/devicetree/base/model 2>/dev/null || echo Unknown)
|
|
||||||
REG_PAYLOAD=$(jq -n \
|
|
||||||
--arg machine "$MACHINE_ID" \
|
|
||||||
--arg name "$(hostname -f)" \
|
|
||||||
--arg setup "$SETUP_CODE" \
|
|
||||||
--arg resolution "1920x1080" \
|
|
||||||
--arg model "$MODEL" \
|
|
||||||
'{
|
|
||||||
machineId: $machine,
|
|
||||||
name: $name,
|
|
||||||
setupCode: ($setup | if . == "" then null else . end),
|
|
||||||
resolution: $resolution,
|
|
||||||
hardwareModel: $model,
|
|
||||||
platform: "linux-arm64-pi"
|
|
||||||
}')
|
|
||||||
|
|
||||||
for attempt in 1 2; do
|
|
||||||
HTTP_STATUS=$(curl -sk -o /tmp/register-response.json -w "%{http_code}" \
|
|
||||||
--max-time 15 \
|
|
||||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/register" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$REG_PAYLOAD" || echo "000")
|
|
||||||
if [[ "$HTTP_STATUS" == "200" || "$HTTP_STATUS" == "201" ]]; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
echo "[$(date -Is)] bootstrap: register attempt $attempt returned $HTTP_STATUS" >&2
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
|
||||||
echo "[$(date -Is)] bootstrap: register failed after 2 attempts" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
NODE_ID=$(jq -r '.nodeId // empty' /tmp/register-response.json)
|
|
||||||
if [[ -z "$NODE_ID" ]]; then
|
|
||||||
echo "[$(date -Is)] bootstrap: register response did not include nodeId" >&2
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
jq --arg id "$NODE_ID" '.nodeId = $id' "$NODE_JSON" > "${NODE_JSON}.tmp" && mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
|
||||||
|
|
||||||
if [[ -s "$SETUP_CODE_FILE" ]]; then
|
|
||||||
curl -sk -X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/approve-via-setup-code" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "{\"setupCode\":\"${SETUP_CODE}\"}" \
|
|
||||||
-o /dev/null || true
|
|
||||||
fi
|
|
||||||
|
|
||||||
STATUS=""
|
|
||||||
DEADLINE=$(( $(date +%s) + 1800 ))
|
|
||||||
while (( $(date +%s) < DEADLINE )); do
|
|
||||||
STATUS=$(curl -sk --max-time 5 "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/status" | jq -r '.status // empty')
|
|
||||||
if [[ "$STATUS" == "Approved" || "$STATUS" == "Enrolled" || "$STATUS" == "Online" ]]; then
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
sleep 15
|
|
||||||
done
|
|
||||||
|
|
||||||
if [[ "$STATUS" != "Approved" && "$STATUS" != "Enrolled" && "$STATUS" != "Online" ]]; then
|
|
||||||
echo "[$(date -Is)] bootstrap: approval not granted within 30min budget" >&2
|
|
||||||
exit 3
|
|
||||||
fi
|
|
||||||
|
|
||||||
KEY_PATH="${CERT_DIR}/client.key"
|
|
||||||
CSR_PATH="${CERT_DIR}/client.csr"
|
|
||||||
openssl ecparam -genkey -name prime256v1 -out "$KEY_PATH"
|
|
||||||
openssl req -new -key "$KEY_PATH" -out "$CSR_PATH" \
|
|
||||||
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
|
||||||
|
|
||||||
ENROLL_PAYLOAD=$(jq -n --arg csr "$(cat "$CSR_PATH")" '{certificateSigningRequest: $csr}')
|
|
||||||
HTTP_STATUS=$(curl -sk -o /tmp/enroll-response.json -w "%{http_code}" \
|
|
||||||
--max-time 15 \
|
|
||||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/enroll" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$ENROLL_PAYLOAD")
|
|
||||||
|
|
||||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
|
||||||
echo "[$(date -Is)] bootstrap: enroll failed with HTTP $HTTP_STATUS" >&2
|
|
||||||
exit 4
|
|
||||||
fi
|
|
||||||
|
|
||||||
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/client.crt"
|
|
||||||
jq -r '.caCertificatePem' /tmp/enroll-response.json > "${CERT_DIR}/ca-chain.pem"
|
|
||||||
P12_PASS=$(openssl rand -hex 24)
|
|
||||||
echo -n "$P12_PASS" > "${CERT_DIR}/client.p12.pass"
|
|
||||||
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
|
||||||
|
|
||||||
openssl pkcs12 -export \
|
|
||||||
-inkey "$KEY_PATH" \
|
|
||||||
-in "${CERT_DIR}/client.crt" \
|
|
||||||
-certfile "${CERT_DIR}/ca-chain.pem" \
|
|
||||||
-out "${CERT_DIR}/client.p12" \
|
|
||||||
-password "pass:${P12_PASS}"
|
|
||||||
|
|
||||||
chown fc-signage:fc-signage "${CERT_DIR}"/* "$NODE_JSON"
|
|
||||||
chmod 0640 "${CERT_DIR}/client.p12" "${CERT_DIR}/client.crt" "${CERT_DIR}/ca-chain.pem" "$KEY_PATH"
|
|
||||||
chmod 0600 "${CERT_DIR}/client.p12.pass"
|
|
||||||
|
|
||||||
EXPIRY=$(openssl x509 -in "${CERT_DIR}/client.crt" -enddate -noout | sed 's/notAfter=//')
|
|
||||||
jq --arg ts "$(date -Is)" --arg exp "$EXPIRY" \
|
|
||||||
'.enrolledAt = $ts | .certExpiry = $exp' "$NODE_JSON" > "${NODE_JSON}.tmp" \
|
|
||||||
&& mv "${NODE_JSON}.tmp" "$NODE_JSON"
|
|
||||||
|
|
||||||
systemctl start flowercore-signage-detect-display.service || true
|
|
||||||
systemctl start flowercore-signage-player-pi.service || true
|
|
||||||
echo "[$(date -Is)] bootstrap: enrolled and kiosk started (NodeId=${NODE_ID})"
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
sleep 2
|
|
||||||
systemctl start flowercore-signage-detect-display.service || true
|
|
||||||
systemctl restart flowercore-signage-player-pi.service
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
|
||||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
|
||||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
|
||||||
CERT_DIR="/etc/fc-signage-player"
|
|
||||||
|
|
||||||
CERT_THUMB=$(openssl pkcs12 -in "$CERT_DIR/client.p12" -passin file:"$CERT_DIR/client.p12.pass" -nodes -nokeys 2>/dev/null \
|
|
||||||
| openssl x509 -fingerprint -sha256 -noout \
|
|
||||||
| sed 's/.*=//' \
|
|
||||||
| tr -d ':')
|
|
||||||
|
|
||||||
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}/embed?token=${CERT_THUMB}"
|
|
||||||
HTTP_STATUS=$(curl -sk -o /dev/null -w "%{http_code}" --max-time 5 \
|
|
||||||
--cert-type P12 --cert "$CERT_DIR/client.p12:$(cat "$CERT_DIR/client.p12.pass")" \
|
|
||||||
"$PLAYER_URL" || echo "000")
|
|
||||||
|
|
||||||
mkdir -p /var/log/fc-signage-player
|
|
||||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "301" && "$HTTP_STATUS" != "302" ]]; then
|
|
||||||
echo "[$(date -Is)] /embed returned $HTTP_STATUS; falling back to /player/${NODE_ID}" \
|
|
||||||
>> /var/log/fc-signage-player/url-divergence.log
|
|
||||||
PLAYER_URL="${SIGNAGE_URL}/player/${NODE_ID}?token=${CERT_THUMB}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exec chromium-browser \
|
|
||||||
--kiosk \
|
|
||||||
--noerrdialogs \
|
|
||||||
--disable-infobars \
|
|
||||||
--disable-translate \
|
|
||||||
--disable-features=TranslateUI,InfiniteSessionRestore \
|
|
||||||
--autoplay-policy=no-user-gesture-required \
|
|
||||||
--password-store=basic \
|
|
||||||
--user-data-dir=/var/lib/fc-signage-player/profile \
|
|
||||||
--disk-cache-dir=/var/lib/fc-signage-player/cache \
|
|
||||||
--disk-cache-size=104857600 \
|
|
||||||
--no-first-run \
|
|
||||||
--no-default-browser-check \
|
|
||||||
--check-for-update-interval=2592000 \
|
|
||||||
--enable-features=OverlayScrollbar \
|
|
||||||
--start-fullscreen \
|
|
||||||
--window-position=0,0 \
|
|
||||||
--window-size=1920,1080 \
|
|
||||||
"$PLAYER_URL"
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
mkdir -p /var/log/fc-signage-player
|
|
||||||
|
|
||||||
for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass; do
|
|
||||||
if [[ ! -r "$f" ]]; then
|
|
||||||
echo "[$(date -Is)] prelaunch: missing or unreadable $f" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
if openssl pkcs12 -in /etc/fc-signage-player/client.p12 -passin file:/etc/fc-signage-player/client.p12.pass -nokeys -clcerts 2>/dev/null \
|
|
||||||
| openssl x509 -checkend $((7*24*3600)) -noout; then
|
|
||||||
:
|
|
||||||
else
|
|
||||||
echo "[$(date -Is)] prelaunch: client cert expires within 7 days" >&2
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "[$(date -Is)] prelaunch: ok" | tee -a /var/log/fc-signage-player/prelaunch.log
|
|
||||||
@@ -1,46 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
CERT_DIR="/etc/fc-signage-player"
|
|
||||||
NODE_JSON="/etc/flowercore/signage-node.json"
|
|
||||||
SIGNAGE_URL="${FC_SIGNAGE_URL:-https://signage.iamworkin.lan}"
|
|
||||||
|
|
||||||
[[ -s "$CERT_DIR/client.crt" ]] || { echo "no cert to renew"; exit 0; }
|
|
||||||
|
|
||||||
if openssl x509 -in "$CERT_DIR/client.crt" -checkend $((30*24*3600)) -noout; then
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
NODE_ID=$(jq -r '.nodeId' "$NODE_JSON")
|
|
||||||
NEW_KEY="$CERT_DIR/client.key.new"
|
|
||||||
NEW_CSR="$CERT_DIR/client.csr.new"
|
|
||||||
|
|
||||||
openssl ecparam -genkey -name prime256v1 -out "$NEW_KEY"
|
|
||||||
openssl req -new -key "$NEW_KEY" -out "$NEW_CSR" \
|
|
||||||
-subj "/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi"
|
|
||||||
|
|
||||||
HTTP_STATUS=$(curl -sk -o /tmp/renew-response.json -w "%{http_code}" \
|
|
||||||
--cert "$CERT_DIR/client.crt" --key "$CERT_DIR/client.key" \
|
|
||||||
-X POST "${SIGNAGE_URL}/api/v1/nodes/${NODE_ID}/renew" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d "$(jq -n --arg csr "$(cat "$NEW_CSR")" '{certificateSigningRequest: $csr}')")
|
|
||||||
|
|
||||||
if [[ "$HTTP_STATUS" != "200" && "$HTTP_STATUS" != "201" ]]; then
|
|
||||||
echo "[$(date -Is)] renew: failed HTTP $HTTP_STATUS; leaving old cert in place" >&2
|
|
||||||
exit 5
|
|
||||||
fi
|
|
||||||
|
|
||||||
jq -r '.clientCertificatePem // .signedCertificatePem' /tmp/renew-response.json > "$CERT_DIR/client.crt.new"
|
|
||||||
jq -r '.caCertificatePem' /tmp/renew-response.json > "$CERT_DIR/ca-chain.pem.new"
|
|
||||||
P12_PASS=$(cat "$CERT_DIR/client.p12.pass")
|
|
||||||
openssl pkcs12 -export -inkey "$NEW_KEY" -in "$CERT_DIR/client.crt.new" \
|
|
||||||
-certfile "$CERT_DIR/ca-chain.pem.new" \
|
|
||||||
-out "$CERT_DIR/client.p12.new" -password "pass:${P12_PASS}"
|
|
||||||
|
|
||||||
mv "$CERT_DIR/client.key.new" "$CERT_DIR/client.key"
|
|
||||||
mv "$CERT_DIR/client.crt.new" "$CERT_DIR/client.crt"
|
|
||||||
mv "$CERT_DIR/ca-chain.pem.new" "$CERT_DIR/ca-chain.pem"
|
|
||||||
mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"
|
|
||||||
|
|
||||||
chown fc-signage:fc-signage "$CERT_DIR"/client.*
|
|
||||||
systemctl restart flowercore-signage-player-pi.service
|
|
||||||
@@ -1,2 +0,0 @@
|
|||||||
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
|
|
||||||
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=FlowerCore Signage Pi: first-boot identity + mTLS enrollment
|
|
||||||
Wants=network-online.target
|
|
||||||
After=network-online.target
|
|
||||||
Before=flowercore-signage-player-pi.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStart=/usr/local/bin/flowercore-signage-bootstrap.sh
|
|
||||||
RemainAfterExit=yes
|
|
||||||
StandardOutput=journal
|
|
||||||
StandardError=journal
|
|
||||||
TimeoutStartSec=2100
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=FlowerCore Signage Pi: detect connected display + declare capabilities
|
|
||||||
After=flowercore-signage-bootstrap.service
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
User=fc-signage
|
|
||||||
ExecStart=/usr/local/bin/fc-signage-detect-display
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=Daily FlowerCore Signage Pi display capability redeclaration
|
|
||||||
|
|
||||||
[Timer]
|
|
||||||
OnCalendar=daily
|
|
||||||
RandomizedDelaySec=1h
|
|
||||||
Persistent=true
|
|
||||||
OnBootSec=30s
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=timers.target
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=FlowerCore Signage Pi Player HDMI hotplug responder
|
|
||||||
DefaultDependencies=no
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStart=/usr/local/bin/flowercore-signage-hdmi-respond.sh
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=FlowerCore Digital Signage Pi Player (Chromium kiosk)
|
|
||||||
Documentation=https://github.com/astoltz/FlowerCore.Notes/blob/master/docs/standards/appletv-pi-signage-agents-design.md
|
|
||||||
Wants=network-online.target
|
|
||||||
After=network-online.target graphical.target
|
|
||||||
ConditionPathExists=/etc/flowercore/signage-node.json
|
|
||||||
ConditionPathExists=/etc/fc-signage-player/client.p12
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
User=fc-signage
|
|
||||||
Group=fc-signage
|
|
||||||
WorkingDirectory=/var/lib/fc-signage-player
|
|
||||||
EnvironmentFile=-/etc/flowercore/signage-player.env
|
|
||||||
ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh
|
|
||||||
ExecStart=/usr/local/bin/flowercore-signage-launch.sh
|
|
||||||
Restart=always
|
|
||||||
RestartSec=10s
|
|
||||||
StartLimitBurst=5
|
|
||||||
StartLimitIntervalSec=300s
|
|
||||||
MemoryMax=2G
|
|
||||||
MemoryHigh=1500M
|
|
||||||
ProtectSystem=strict
|
|
||||||
ProtectHome=true
|
|
||||||
ReadWritePaths=/var/lib/fc-signage-player /var/log/fc-signage-player
|
|
||||||
PrivateTmp=true
|
|
||||||
NoNewPrivileges=true
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=graphical.target
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=FlowerCore Signage Pi: cert renewal worker
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
ExecStart=/usr/local/bin/flowercore-signage-renew-cert.sh
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
[Unit]
|
|
||||||
Description=Daily check for FlowerCore Signage Pi cert renewal
|
|
||||||
|
|
||||||
[Timer]
|
|
||||||
OnCalendar=daily
|
|
||||||
RandomizedDelaySec=2h
|
|
||||||
Persistent=true
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=timers.target
|
|
||||||
@@ -1,22 +0,0 @@
|
|||||||
#!/usr/bin/env bats
|
|
||||||
|
|
||||||
setup() {
|
|
||||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
|
||||||
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
|
|
||||||
script="$(cat "$DETECT")"
|
|
||||||
[[ "$script" == *"displayConnected: false"* ]]
|
|
||||||
[[ "$script" == *"No HDMI display detected"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
|
|
||||||
script="$(cat "$DETECT")"
|
|
||||||
[[ "$script" == *"edid-decode"* ]]
|
|
||||||
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
|
|
||||||
[[ "$script" == *"kmsprint"* ]]
|
|
||||||
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
|
|
||||||
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
|
|
||||||
[[ "$script" == *"capabilities.log"* ]]
|
|
||||||
}
|
|
||||||
@@ -1,64 +0,0 @@
|
|||||||
#!/usr/bin/env bats
|
|
||||||
|
|
||||||
setup() {
|
|
||||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
|
||||||
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
|
|
||||||
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap is idempotent when node is already enrolled" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
|
|
||||||
[[ "$script" == *"already enrolled"* ]]
|
|
||||||
[[ "$script" == *"exit 0"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap generates a stable node uuid and machine id" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *"uuidgen"* ]]
|
|
||||||
[[ "$script" == *"nodeUuid"* ]]
|
|
||||||
[[ "$script" == *"machineId"* ]]
|
|
||||||
[[ "$script" == *"cut -c1-16"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap posts to the canonical register endpoint" {
|
|
||||||
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
|
|
||||||
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap retries registration once for first-call races" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *"for attempt in 1 2"* ]]
|
|
||||||
[[ "$script" == *"register attempt \$attempt returned"* ]]
|
|
||||||
[[ "$script" == *"sleep 5"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap supports setup-code approval with manual polling fallback" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *"signage-setup-code"* ]]
|
|
||||||
[[ "$script" == *"approve-via-setup-code"* ]]
|
|
||||||
[[ "$script" == *"+ 1800"* ]]
|
|
||||||
[[ "$script" == *"sleep 15"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
|
|
||||||
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
|
|
||||||
script="$(cat "$BOOTSTRAP")"
|
|
||||||
[[ "$script" == *"openssl pkcs12 -export"* ]]
|
|
||||||
[[ "$script" == *"client.p12.pass"* ]]
|
|
||||||
[[ "$script" == *"chmod 0640"* ]]
|
|
||||||
[[ "$script" == *"chmod 0600"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
|
|
||||||
script="$(cat "$RENEW")"
|
|
||||||
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
|
|
||||||
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
|
|
||||||
[[ "$script" == *"client.key.new"* ]]
|
|
||||||
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
|
|
||||||
}
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
#!/usr/bin/env bats
|
|
||||||
|
|
||||||
setup() {
|
|
||||||
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "player unit exists" {
|
|
||||||
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "player unit uses simple chromium service with restart backoff" {
|
|
||||||
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
|
|
||||||
[[ "$unit" == *"Type=simple"* ]]
|
|
||||||
[[ "$unit" == *"Restart=always"* ]]
|
|
||||||
[[ "$unit" == *"RestartSec=10s"* ]]
|
|
||||||
[[ "$unit" == *"StartLimitBurst=5"* ]]
|
|
||||||
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "player unit caps chromium memory at two gigabytes" {
|
|
||||||
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "player unit condition-gates startup on identity and p12 certificate" {
|
|
||||||
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "player unit runs prelaunch checks before chromium" {
|
|
||||||
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "hdmi udev rule routes through the two-second settle service" {
|
|
||||||
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
|
|
||||||
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
|
|
||||||
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
|
|
||||||
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "hdmi responder settles, declares display, then restarts chromium" {
|
|
||||||
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
|
|
||||||
[[ "$responder" == *"sleep 2"* ]]
|
|
||||||
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
|
|
||||||
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "chromium policy json is valid and disables credential prompts" {
|
|
||||||
command -v jq >/dev/null || skip "jq not installed"
|
|
||||||
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
|
|
||||||
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "launch script tries embed URL and logs bare-player fallback" {
|
|
||||||
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
|
|
||||||
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
|
|
||||||
[[ "$launch" == *"url-divergence.log"* ]]
|
|
||||||
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
|
|
||||||
}
|
|
||||||
|
|
||||||
@test "prelaunch script validates required node and cert files" {
|
|
||||||
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
|
|
||||||
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
|
|
||||||
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
|
|
||||||
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
|
|
||||||
[[ "$prelaunch" == *"exit 1"* ]]
|
|
||||||
}
|
|
||||||
@@ -58,7 +58,7 @@ spec:
|
|||||||
nodeName: rke2-server
|
nodeName: rke2-server
|
||||||
containers:
|
containers:
|
||||||
- name: web
|
- name: web
|
||||||
image: localhost/fc-updater-web:v20260509-4162dca-authgate
|
image: localhost/fc-updater-web:v20260508-pub3-deepening-2bdf108
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8080
|
- containerPort: 8080
|
||||||
|
|||||||
@@ -1,61 +0,0 @@
|
|||||||
# GitHub Runner Fleet
|
|
||||||
|
|
||||||
ArgoCD owns `apps/github-runner/github-runner.yaml`. Do not patch live runner
|
|
||||||
Deployments with `kubectl`; update this manifest and let ArgoCD reconcile.
|
|
||||||
|
|
||||||
## Runner Shape
|
|
||||||
|
|
||||||
All repo-scoped Linux runners use:
|
|
||||||
|
|
||||||
- `ACCESS_TOKEN` from the `github-runner-token` Secret
|
|
||||||
- `RUN_AS_ROOT=false`
|
|
||||||
- `EPHEMERAL=true`
|
|
||||||
- `LABELS=self-hosted,linux,fc-build-linux`
|
|
||||||
- writable non-root paths under `/home/runner` for .NET, NuGet, XDG cache, and
|
|
||||||
Actions tool cache
|
|
||||||
|
|
||||||
`github-runner` for `FlowerCore.Common` is single-replica because it retains the
|
|
||||||
original Longhorn ReadWriteOnce NuGet PVC. `github-runner-sharedpos` and the top
|
|
||||||
Linux-cost repo runners use two replicas with per-pod `emptyDir` caches. That is
|
|
||||||
the safe backlog-drain strategy: no two pods share one RWO PVC.
|
|
||||||
|
|
||||||
## Post-Merge Proof
|
|
||||||
|
|
||||||
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
kubectl -n github-runner get deploy,pods,pvc
|
|
||||||
```
|
|
||||||
|
|
||||||
Verify GitHub registration for the repo-scoped runners:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore.Signage \
|
|
||||||
FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat \
|
|
||||||
FlowerCore.MySQL FlowerCore.Kiosk.Linux; do
|
|
||||||
echo "=== $repo ==="
|
|
||||||
gh api "/repos/astoltz/$repo/actions/runners" \
|
|
||||||
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
|
||||||
done
|
|
||||||
```
|
|
||||||
|
|
||||||
Shared.Pos publish proof after the runner pod is online:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
gh run list --repo astoltz/FlowerCore.Shared.Pos \
|
|
||||||
--workflow "Build, Test & Publish" --branch main --limit 5
|
|
||||||
```
|
|
||||||
|
|
||||||
If the latest run is still queued after runner registration, rerun the workflow
|
|
||||||
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
|
||||||
|
|
||||||
## Failure Notes
|
|
||||||
|
|
||||||
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
|
||||||
`DOTNET_INSTALL_DIR=/home/runner/.dotnet` and related cache env vars are
|
|
||||||
present on the runner pod.
|
|
||||||
- `404` during runner registration: the fine-grained PAT is valid but missing
|
|
||||||
repository access for that repo. Add the repo to the PAT access list; the PAT
|
|
||||||
value does not change.
|
|
||||||
- `Multi-Attach` volume error: only the Common runner uses a RWO PVC and it must
|
|
||||||
stay single-replica. New multi-replica runners use `emptyDir`.
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -466,11 +466,11 @@ spec:
|
|||||||
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
itemPath: vaults/IAmWorkin/items/Guacamole JSON Auth
|
||||||
---
|
---
|
||||||
---
|
---
|
||||||
# 1Password-backed credentials for Mac mini VNC access (Phase 1 <EFBFBD> 2026-04-28)
|
# 1Password-backed credentials for Mac mini VNC access (Phase 1 — 2026-04-28)
|
||||||
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
# The operator mints Secret 'macmini-vnc-creds' with keys: username, password, VNC Password
|
||||||
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
# Note: '1Password' field label 'VNC Password' -> K8s Secret key 'VNC Password' (space retained)
|
||||||
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
# Guacamole VNC connection password is sourced from the 'VNC Password' field.
|
||||||
# Actual IP is 10.0.56.115 (INFRA VLAN) <EFBFBD> the 1P item 'IP' field is kept as backup reference.
|
# Actual IP is 10.0.56.115 (INFRA VLAN) — the 1P item 'IP' field is kept as backup reference.
|
||||||
apiVersion: onepassword.com/v1
|
apiVersion: onepassword.com/v1
|
||||||
kind: OnePasswordItem
|
kind: OnePasswordItem
|
||||||
metadata:
|
metadata:
|
||||||
@@ -481,7 +481,6 @@ metadata:
|
|||||||
app.kubernetes.io/part-of: flowercore
|
app.kubernetes.io/part-of: flowercore
|
||||||
spec:
|
spec:
|
||||||
itemPath: vaults/IAmWorkin/items/Mac Mini
|
itemPath: vaults/IAmWorkin/items/Mac Mini
|
||||||
---
|
|
||||||
# Blue Jay Branding Extension (CSS + translations)
|
# Blue Jay Branding Extension (CSS + translations)
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
|
|||||||
@@ -1,9 +1,51 @@
|
|||||||
# =============================================================================
|
# =============================================================================
|
||||||
# ci1 - Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
# ci1 — Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Boots from the sysprepped containerDisk template built by the Windows VM
|
# Purpose: dedicated CI runner for FlowerCore.Updater Sandbox E2E nightly +
|
||||||
# sysprep pipeline. See docs/infrastructure/windows-vm-sysprep-pipeline.md.
|
# future fleet WPF AAT lanes. Replaces the never-registered
|
||||||
# Path A/B/C install history is preserved in git log only.
|
# `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want
|
||||||
|
# BLUEJAY-WS registered as a runner (workstation has personal/operator state).
|
||||||
|
#
|
||||||
|
# Storage layout (2026-05-08):
|
||||||
|
# * ISO is now sourced from Synology NFS (Path B) — see
|
||||||
|
# win2025-iso-nfs-pv.yaml. The Longhorn Filesystem PVC
|
||||||
|
# `windows-server-2025-iso` below is RETAINED but UNUSED so the prior
|
||||||
|
# CDI upload state is preserved as a fallback (and so ArgoCD doesn't
|
||||||
|
# prune it on this commit). It can be deleted in a follow-up commit
|
||||||
|
# after the NFS path is proven on a successful Windows install.
|
||||||
|
#
|
||||||
|
# Status (2026-05-08): LIVE — Phase 1 prereqs satisfied:
|
||||||
|
# * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes
|
||||||
|
# (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy)
|
||||||
|
# * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet
|
||||||
|
# `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward)
|
||||||
|
# * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to
|
||||||
|
# PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete)
|
||||||
|
# * Local Administrator password generated, stored in 1Password vault
|
||||||
|
# IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu
|
||||||
|
# * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/
|
||||||
|
# prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5
|
||||||
|
# host bridge work lands (Puppet br-prod + enp86s0.57); switching is a
|
||||||
|
# one-line YAML edit + git push.
|
||||||
|
#
|
||||||
|
# See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate".
|
||||||
|
#
|
||||||
|
# Network choice in this draft: **pod-network fallback** (Calico default).
|
||||||
|
# Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner
|
||||||
|
# polls GitHub Actions over HTTPS; no inbound listener needed). Switch to a
|
||||||
|
# Multus PROD VLAN NetworkAttachmentDefinition once Multus is installed and the
|
||||||
|
# operator wants L2 access from `ci1` to other PROD VLAN services.
|
||||||
|
#
|
||||||
|
# Sizing: 8 vCPU / 16 GB RAM / 200 GB disk on Longhorn (default storageClass).
|
||||||
|
# Capacity check 2026-05-08: each RKE2 node has 16 vCPU / ~64Gi allocatable;
|
||||||
|
# 8 vCPU is ~17% of one node's allocatable, fits comfortably.
|
||||||
|
#
|
||||||
|
# Apply (after operator approval + ISO loaded):
|
||||||
|
# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/ci1.yaml
|
||||||
|
#
|
||||||
|
# Connect to console for Windows install:
|
||||||
|
# virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml vnc ci1 -n kubevirt-vms
|
||||||
|
# (Or via Guacamole once a connection profile is added.)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
@@ -15,6 +57,248 @@ metadata:
|
|||||||
pod-security.kubernetes.io/enforce: privileged
|
pod-security.kubernetes.io/enforce: privileged
|
||||||
|
|
||||||
---
|
---
|
||||||
|
# ISO PVC — populated via CDI virtctl image-upload (CDI is now installed).
|
||||||
|
#
|
||||||
|
# **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to
|
||||||
|
# `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA
|
||||||
|
# CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107
|
||||||
|
# with `capabilities.drop: [ALL]` and cannot open the underlying block
|
||||||
|
# device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`).
|
||||||
|
# Reverted to Filesystem PVC pending one of:
|
||||||
|
# - CDI deployment override granting CAP_SYS_RAWIO to upload pod
|
||||||
|
# - Pre-populated PVC via privileged init pod that dd's the ISO directly
|
||||||
|
# - Migration to a different storage class that exposes block devices
|
||||||
|
# differently (e.g. iSCSI, where Longhorn's CSI mount path may behave
|
||||||
|
# differently)
|
||||||
|
#
|
||||||
|
# Population workflow (this PVC, Filesystem mode):
|
||||||
|
# 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \
|
||||||
|
# windows-server-2025-iso -n kubevirt-vms \
|
||||||
|
# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \
|
||||||
|
# --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \
|
||||||
|
# --uploadproxy-url https://localhost:8443 --insecure
|
||||||
|
# (--uploadproxy-url uses port-forward in practice: `kubectl port-forward
|
||||||
|
# -n cdi service/cdi-uploadproxy 8443:443 &` first.)
|
||||||
|
#
|
||||||
|
# **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed:
|
||||||
|
# BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...)
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the
|
||||||
|
# timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC.
|
||||||
|
# Block mode would likely fix it; see CDI permission issue above.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: windows-server-2025-iso
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app: ci-runner
|
||||||
|
flowercore.io/managed-by: bluejay-infra
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom
|
||||||
|
storageClassName: longhorn
|
||||||
|
|
||||||
|
---
|
||||||
|
# Root disk PVC — empty 200Gi volume that Windows installs into.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ci1-rootdisk
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 200Gi
|
||||||
|
storageClassName: longhorn
|
||||||
|
|
||||||
|
---
|
||||||
|
# Sysprep ConfigMap — autounattend.xml for hands-off Windows install.
|
||||||
|
# Sets local Administrator password (REPLACE the placeholder), enables RDP,
|
||||||
|
# enables WinRM, sets hostname, and configures static-ish networking via DHCP.
|
||||||
|
# The ISO + VirtIO drivers handle the rest.
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: ci1-autounattend
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
data:
|
||||||
|
autounattend.xml: |
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<unattend xmlns="urn:schemas-microsoft-com:unattend">
|
||||||
|
|
||||||
|
<!-- Pass 1: WindowsPE — Disk setup and VirtIO driver injection -->
|
||||||
|
<settings pass="windowsPE">
|
||||||
|
<component name="Microsoft-Windows-International-Core-WinPE"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<SetupUILanguage>
|
||||||
|
<UILanguage>en-US</UILanguage>
|
||||||
|
</SetupUILanguage>
|
||||||
|
<InputLocale>en-US</InputLocale>
|
||||||
|
<SystemLocale>en-US</SystemLocale>
|
||||||
|
<UILanguage>en-US</UILanguage>
|
||||||
|
<UserLocale>en-US</UserLocale>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-PnpCustomizationsWinPE"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<DriverPaths>
|
||||||
|
<PathAndCredentials wcm:action="add" wcm:keyValue="1">
|
||||||
|
<Path>E:\amd64\2k25</Path>
|
||||||
|
</PathAndCredentials>
|
||||||
|
</DriverPaths>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<DiskConfiguration>
|
||||||
|
<Disk wcm:action="add">
|
||||||
|
<DiskID>0</DiskID>
|
||||||
|
<WillWipeDisk>true</WillWipeDisk>
|
||||||
|
<CreatePartitions>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<Size>260</Size>
|
||||||
|
<Type>EFI</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<Size>128</Size>
|
||||||
|
<Type>MSR</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
<CreatePartition wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<Extend>true</Extend>
|
||||||
|
<Type>Primary</Type>
|
||||||
|
</CreatePartition>
|
||||||
|
</CreatePartitions>
|
||||||
|
<ModifyPartitions>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<PartitionID>1</PartitionID>
|
||||||
|
<Format>FAT32</Format>
|
||||||
|
<Label>EFI</Label>
|
||||||
|
</ModifyPartition>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<PartitionID>2</PartitionID>
|
||||||
|
</ModifyPartition>
|
||||||
|
<ModifyPartition wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<PartitionID>3</PartitionID>
|
||||||
|
<Format>NTFS</Format>
|
||||||
|
<Label>Windows</Label>
|
||||||
|
</ModifyPartition>
|
||||||
|
</ModifyPartitions>
|
||||||
|
</Disk>
|
||||||
|
</DiskConfiguration>
|
||||||
|
|
||||||
|
<ImageInstall>
|
||||||
|
<OSImage>
|
||||||
|
<InstallTo>
|
||||||
|
<DiskID>0</DiskID>
|
||||||
|
<PartitionID>3</PartitionID>
|
||||||
|
</InstallTo>
|
||||||
|
<!-- Index 2 = Standard Desktop Experience. Use 4 for Datacenter Desktop. -->
|
||||||
|
<InstallFrom>
|
||||||
|
<MetaData wcm:action="add">
|
||||||
|
<Key>/IMAGE/INDEX</Key>
|
||||||
|
<Value>2</Value>
|
||||||
|
</MetaData>
|
||||||
|
</InstallFrom>
|
||||||
|
</OSImage>
|
||||||
|
</ImageInstall>
|
||||||
|
|
||||||
|
<UserData>
|
||||||
|
<AcceptEula>true</AcceptEula>
|
||||||
|
<FullName>FlowerCore CI Runner</FullName>
|
||||||
|
<Organization>FlowerCore</Organization>
|
||||||
|
<!-- Eval install — no product key needed for 180-day evaluation -->
|
||||||
|
</UserData>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<!-- Pass 4: Specialize — Hostname, RDP, WinRM -->
|
||||||
|
<settings pass="specialize">
|
||||||
|
<component name="Microsoft-Windows-Shell-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<ComputerName>CI1</ComputerName>
|
||||||
|
<TimeZone>Central Standard Time</TimeZone>
|
||||||
|
</component>
|
||||||
|
|
||||||
|
<component name="Microsoft-Windows-TerminalServices-LocalSessionManager"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<fDenyTSConnections>false</fDenyTSConnections>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
|
||||||
|
<!-- Pass 7: OOBE — Admin account, RDP firewall, WinRM -->
|
||||||
|
<settings pass="oobeSystem">
|
||||||
|
<component name="Microsoft-Windows-Shell-Setup"
|
||||||
|
processorArchitecture="amd64"
|
||||||
|
publicKeyToken="31bf3856ad364e35"
|
||||||
|
language="neutral" versionScope="nonSxS">
|
||||||
|
<OOBE>
|
||||||
|
<HideEULAPage>true</HideEULAPage>
|
||||||
|
<HideLocalAccountScreen>true</HideLocalAccountScreen>
|
||||||
|
<HideOEMRegistrationScreen>true</HideOEMRegistrationScreen>
|
||||||
|
<HideOnlineAccountScreens>true</HideOnlineAccountScreens>
|
||||||
|
<HideWirelessSetupInOOBE>true</HideWirelessSetupInOOBE>
|
||||||
|
<ProtectYourPC>3</ProtectYourPC>
|
||||||
|
</OOBE>
|
||||||
|
<UserAccounts>
|
||||||
|
<AdministratorPassword>
|
||||||
|
<!-- Real password is in 1Password — vault qaphopopkryhbg353ukzhhuqoq,
|
||||||
|
item id h3ix4mgfk65gmkcmvh6ly3d3hu, title:
|
||||||
|
"ci1 Administrator (Windows Server 2025 KubeVirt VM)".
|
||||||
|
Field "autounattend AdministratorPassword Value (UTF-16-LE base64)"
|
||||||
|
matches the Value below.
|
||||||
|
To rotate: regenerate, recompute base64
|
||||||
|
$combined = $pw + "AdministratorPassword"
|
||||||
|
[Convert]::ToBase64String([Text.Encoding]::Unicode.GetBytes($combined))
|
||||||
|
then update both 1P item AND this Value field, recreate VM. -->
|
||||||
|
<Value>bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA==</Value>
|
||||||
|
<PlainText>false</PlainText>
|
||||||
|
</AdministratorPassword>
|
||||||
|
</UserAccounts>
|
||||||
|
<FirstLogonCommands>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>1</Order>
|
||||||
|
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Set-NetFirewallRule -DisplayGroup 'Remote Desktop' -Enabled True"</CommandLine>
|
||||||
|
<Description>Enable RDP firewall rule</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>2</Order>
|
||||||
|
<CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Enable-PSRemoting -Force; Set-Item WSMan:\localhost\Service\Auth\Basic $true; Set-Item WSMan:\localhost\Service\AllowUnencrypted $true"</CommandLine>
|
||||||
|
<Description>Enable WinRM (Phase 2 will pivot to HTTPS via step-ca cert)</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
<SynchronousCommand wcm:action="add">
|
||||||
|
<Order>3</Order>
|
||||||
|
<CommandLine>cmd.exe /c reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System" /v EnableLUA /t REG_DWORD /d 0 /f</CommandLine>
|
||||||
|
<Description>Disable UAC (Phase 2 Puppet will re-evaluate)</Description>
|
||||||
|
</SynchronousCommand>
|
||||||
|
</FirstLogonCommands>
|
||||||
|
</component>
|
||||||
|
</settings>
|
||||||
|
</unattend>
|
||||||
|
|
||||||
|
---
|
||||||
|
# VirtualMachine — Windows Server 2025 CI runner.
|
||||||
apiVersion: kubevirt.io/v1
|
apiVersion: kubevirt.io/v1
|
||||||
kind: VirtualMachine
|
kind: VirtualMachine
|
||||||
metadata:
|
metadata:
|
||||||
@@ -25,7 +309,33 @@ metadata:
|
|||||||
role: github-actions-runner
|
role: github-actions-runner
|
||||||
flowercore.io/managed-by: bluejay-infra
|
flowercore.io/managed-by: bluejay-infra
|
||||||
spec:
|
spec:
|
||||||
runStrategy: Always
|
# `running: true` is deprecated in favor of `runStrategy`. They are mutually
|
||||||
|
# exclusive — KubeVirt's validating webhook rejects any VM that sets both:
|
||||||
|
# admission webhook "virtualmachine-validator.kubevirt.io" denied the request:
|
||||||
|
# Running and RunStrategy are mutually exclusive.
|
||||||
|
# `Always` keeps a VMI running and restarts it if it crashes/exits — same
|
||||||
|
# semantics as the old `running: true`.
|
||||||
|
#
|
||||||
|
# **2026-05-08 status: VM cannot start due to a stale QEMU flock on the
|
||||||
|
# rootdisk PVC** (qemu reports `Failed to get "write" lock` on
|
||||||
|
# `/var/run/kubevirt-private/vmi-disks/rootdisk/disk.img`). The flock was
|
||||||
|
# left by a previous QEMU process during a force-deleted launcher pod
|
||||||
|
# cycle. Recovery requires either (a) a Longhorn engine restart on
|
||||||
|
# rke2-agent2, (b) a Longhorn volume detach via the longhorn-manager API
|
||||||
|
# (kubectl patch on `volume.longhorn.io/<pvc-name>` does not work — the
|
||||||
|
# spec.nodeID is reconciled back), or (c) a node reboot of rke2-agent2.
|
||||||
|
#
|
||||||
|
# **Confirmed working:** the bootOrder swap (windows-iso=1, rootdisk=2)
|
||||||
|
# and the runStrategy migration (above). The ISO PVC was successfully
|
||||||
|
# repopulated via virtctl image-upload pvc on the Filesystem-mode PVC.
|
||||||
|
#
|
||||||
|
# **Open: SATA CDROM read timeout** — even with bootOrder=1, OVMF reported
|
||||||
|
# `BdsDxe: failed to start Boot0001 ... Time out` reading the SATA CDROM
|
||||||
|
# backed by the Filesystem-mode PVC. A switch to Block-mode DataVolume
|
||||||
|
# was attempted but blocked by a CDI v1.65.0 upload-pod permission issue
|
||||||
|
# (capability drop prevents writing to the underlying block device).
|
||||||
|
# See header docstring on the ISO PVC.
|
||||||
|
runStrategy: Always # LIVE — ISO uploaded 2026-05-08, password in 1P
|
||||||
template:
|
template:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
@@ -67,16 +377,46 @@ spec:
|
|||||||
firmware:
|
firmware:
|
||||||
bootloader:
|
bootloader:
|
||||||
efi:
|
efi:
|
||||||
secureBoot: false
|
secureBoot: true
|
||||||
devices:
|
devices:
|
||||||
tpm: {}
|
tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker
|
||||||
disks:
|
disks:
|
||||||
|
# bootOrder: ISO must be 1 for first-boot install (the rootdisk has no
|
||||||
|
# EFI bootloader yet). After Windows installs, it writes its own UEFI
|
||||||
|
# Boot#### entries pointing at the rootdisk's EFI partition; UEFI then
|
||||||
|
# boots from rootdisk going forward and the ISO at bootOrder:2 acts as
|
||||||
|
# a fallback for re-install scenarios.
|
||||||
|
#
|
||||||
|
# Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried
|
||||||
|
# the empty virtio disk first, got nothing, fell back to the SATA
|
||||||
|
# CDROM at Boot0001 with a short timeout, and timed out before the
|
||||||
|
# CDROM enumerated. Console showed:
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# BdsDxe: No bootable option or device was found.
|
||||||
|
# Confirmed via debug pod: PVC content IS a real bootable ISO9660
|
||||||
|
# (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the
|
||||||
|
# only bug was boot priority.
|
||||||
|
- name: windows-iso
|
||||||
|
bootOrder: 1
|
||||||
|
cdrom:
|
||||||
|
bus: sata
|
||||||
- name: rootdisk
|
- name: rootdisk
|
||||||
|
bootOrder: 2
|
||||||
disk:
|
disk:
|
||||||
bus: virtio
|
bus: virtio
|
||||||
|
- name: virtio-drivers
|
||||||
|
cdrom:
|
||||||
|
bus: sata
|
||||||
|
- name: sysprep
|
||||||
|
cdrom:
|
||||||
|
bus: sata
|
||||||
interfaces:
|
interfaces:
|
||||||
# Pod-network fallback for CI runner outbound traffic. Switch to
|
# Pod-network fallback for Phase 1. To switch to PROD VLAN once Multus
|
||||||
# prod-vlan57 once the bridge/NAD lane is ready for L2 access.
|
# + the prod-vlan57 NAD exist, replace this block with:
|
||||||
|
# - name: prod-net
|
||||||
|
# bridge: {}
|
||||||
|
# model: virtio
|
||||||
|
# and update the networks: stanza to use multus.networkName: kubevirt-vms/prod-vlan57
|
||||||
- name: default
|
- name: default
|
||||||
masquerade: {}
|
masquerade: {}
|
||||||
model: virtio
|
model: virtio
|
||||||
@@ -87,7 +427,32 @@ spec:
|
|||||||
pod: {}
|
pod: {}
|
||||||
volumes:
|
volumes:
|
||||||
- name: rootdisk
|
- name: rootdisk
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ci1-rootdisk
|
||||||
|
- name: windows-iso
|
||||||
|
# Path B (2026-05-08): mount ISO from Synology NFS instead of
|
||||||
|
# Longhorn Filesystem PVC. The Filesystem-PVC path was confirmed to
|
||||||
|
# contain a valid bootable ISO9660 image but caused OVMF's
|
||||||
|
# SATA-CDROM read window to time out:
|
||||||
|
# BdsDxe: failed to start Boot0001 ... Time out
|
||||||
|
# Block-mode DataVolume was attempted as Path A but blocked by CDI
|
||||||
|
# v1.65.0's upload pod capability drop. NFS-mounted ISO bypasses
|
||||||
|
# both issues. See win2025-iso-nfs-pv.yaml header for full rationale
|
||||||
|
# and Synology layout.
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: windows-server-2025-iso-nfs
|
||||||
|
- name: virtio-drivers
|
||||||
containerDisk:
|
containerDisk:
|
||||||
image: localhost/fc-win-server-2025:v1
|
# Pinned to v1.8.2 (latest stable as of 2026-05-08).
|
||||||
imagePullPolicy: Never
|
# The :latest tag uses Docker manifest v1 schema which containerd
|
||||||
|
# 2.1 (RKE2 v1.34.5) refuses to pull with:
|
||||||
|
# "media type application/vnd.docker.distribution.manifest.v1+prettyjws
|
||||||
|
# is no longer supported since containerd v2.1"
|
||||||
|
# v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1.
|
||||||
|
# Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags
|
||||||
|
image: quay.io/kubevirt/virtio-container-disk:v1.8.2
|
||||||
|
- name: sysprep
|
||||||
|
sysprep:
|
||||||
|
configMap:
|
||||||
|
name: ci1-autounattend
|
||||||
terminationGracePeriodSeconds: 3600
|
terminationGracePeriodSeconds: 3600
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
resources:
|
|
||||||
- ci1.yaml
|
|
||||||
- prod-vlan57-nad.yaml
|
|
||||||
@@ -75,20 +75,6 @@ data:
|
|||||||
cluster: "rke2"
|
cluster: "rke2"
|
||||||
role: "agent"
|
role: "agent"
|
||||||
|
|
||||||
# Mac mini macOS runner node (INFRA VLAN)
|
|
||||||
- job_name: "macmini-node"
|
|
||||||
scrape_timeout: 15s
|
|
||||||
static_configs:
|
|
||||||
- targets: ["10.0.56.115:9100"]
|
|
||||||
labels:
|
|
||||||
instance: "macmini"
|
|
||||||
host: "macmini.iamworkin.lan"
|
|
||||||
vlan: "infra"
|
|
||||||
arch: "arm64"
|
|
||||||
role: "macos-runner"
|
|
||||||
puppet_managed: "true"
|
|
||||||
puppet_server: "puppet.iamworkin.lan"
|
|
||||||
|
|
||||||
# In-cluster node-exporter DaemonSet
|
# In-cluster node-exporter DaemonSet
|
||||||
- job_name: "k8s-node-exporter"
|
- job_name: "k8s-node-exporter"
|
||||||
kubernetes_sd_configs:
|
kubernetes_sd_configs:
|
||||||
@@ -711,36 +697,6 @@ data:
|
|||||||
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
summary: "Print.Web Ollama runner held for >10m ({{ $labels.model }})"
|
||||||
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
description: "Print.Web reports model {{ $labels.model }} with {{ $value | printf \"%.0f\" }}s of keep-alive remaining. Check concurrent requests before the Pi 5 Ollama lane thrashes."
|
||||||
|
|
||||||
- name: macmini-runners
|
|
||||||
rules:
|
|
||||||
- alert: MacMiniRunnerOffline
|
|
||||||
expr: (flowercore_github_runner_online{runner=~"macmini-.*"} == 0) or absent(flowercore_github_runner_online{runner=~"macmini-.*"})
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
service: github-runner
|
|
||||||
annotations:
|
|
||||||
summary: "Mac mini GitHub runner offline ({{ $labels.runner }})"
|
|
||||||
description: "A macmini-* GitHub Actions runner has not reported online for more than 10 minutes. Puppet manages its LaunchDaemon under /Library/LaunchDaemons/io.flowercore.github-runner-<slug>.plist; runners survive reboot and do not require a GUI session."
|
|
||||||
|
|
||||||
- name: linux-runners
|
|
||||||
rules:
|
|
||||||
- alert: LinuxRunnerOffline
|
|
||||||
expr: |
|
|
||||||
kube_deployment_status_replicas_ready{
|
|
||||||
namespace="github-runner",
|
|
||||||
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
|
|
||||||
} == 0
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
alert_channel: irc
|
|
||||||
service: github-runner
|
|
||||||
team: ci
|
|
||||||
annotations:
|
|
||||||
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
|
||||||
description: "Deployment {{ $labels.deployment }} in namespace github-runner has 0 ready replicas for more than 5 minutes. CI jobs targeting this repo will queue until the runner pod restarts and re-registers with GitHub. Check pods with: kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }}. Check logs with: kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50. Common causes: PAT missing repo access, runner CrashLoopBackOff, or node/resource pressure."
|
|
||||||
|
|
||||||
- name: remote-desktop
|
- name: remote-desktop
|
||||||
rules:
|
rules:
|
||||||
- alert: RemoteDesktopWebDown
|
- alert: RemoteDesktopWebDown
|
||||||
@@ -1018,39 +974,6 @@ data:
|
|||||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||||
|
|
||||||
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
|
||||||
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
|
||||||
# outage (21h) hit because no alert fired on the rising multus working
|
|
||||||
# set — only downstream blackbox / Traefik / service alerts. With
|
|
||||||
# 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
|
|
||||||
# runs ~150-250MiB so this only fires when an avalanche starts.
|
|
||||||
- alert: MultusMemoryPressure
|
|
||||||
expr: |
|
|
||||||
container_memory_working_set_bytes{container="kube-multus"}
|
|
||||||
/ container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
alert_channel: thermal_print
|
|
||||||
annotations:
|
|
||||||
summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
|
|
||||||
description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
|
|
||||||
|
|
||||||
# Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
|
|
||||||
# operator-leak avalanche pattern BEFORE it cascades into a multus
|
|
||||||
# CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
|
|
||||||
# emitting pods without ownerReferences will accumulate them when
|
|
||||||
# the operator crashes. >25 pending pods in any namespace for 30m
|
|
||||||
# is the signal to investigate the reconciler.
|
|
||||||
- alert: NamespacePendingPodBacklog
|
|
||||||
expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
|
|
||||||
for: 30m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
|
|
||||||
description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
|
|
||||||
|
|
||||||
# Longhorn storage health alerts. Required: longhorn scrape job
|
# Longhorn storage health alerts. Required: longhorn scrape job
|
||||||
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
# (added 2026-04-26 — see scrape_configs above). The K8s events
|
||||||
# for "snapshot becomes not ready to use" are transient lifecycle
|
# for "snapshot becomes not ready to use" are transient lifecycle
|
||||||
@@ -3440,30 +3363,29 @@ data:
|
|||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [600], type: gt}}], refId: C}
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
name: CI Runners
|
name: Signage Marquee
|
||||||
folder: CI Alerts
|
folder: AI Stack Alerts
|
||||||
interval: 1m
|
interval: 1m
|
||||||
rules:
|
rules:
|
||||||
- uid: linux-runner-offline
|
- uid: marquee-dropped-frames-high
|
||||||
title: LinuxRunnerOffline
|
title: MarqueeDroppedFramesHigh
|
||||||
condition: C
|
condition: C
|
||||||
for: 5m
|
for: 5m
|
||||||
noDataState: OK
|
noDataState: OK
|
||||||
execErrState: Error
|
execErrState: OK
|
||||||
annotations:
|
annotations:
|
||||||
summary: "Linux CI runner offline: {{ $labels.deployment }}"
|
summary: Marquee dropped-frame rate above 5%
|
||||||
description: "A github-runner namespace Deployment has 0 ready replicas for more than 5 minutes. CI jobs targeting that repo will queue until the runner pod restarts and re-registers."
|
description: "Dropped frames exceeded the IR-21 budget for a renderer/phase/node tuple. Grafana owns alert delivery to IRC #alerts; Prometheus rules remain only the visibility source."
|
||||||
runbook: "1. kubectl -n github-runner get pods -l app.kubernetes.io/name={{ $labels.deployment }} 2. kubectl -n github-runner logs -l app.kubernetes.io/name={{ $labels.deployment }} --tail=50 3. Verify PAT repo access if registration returns 404 4. Verify no RWO PVC is shared by scaled runners"
|
runbook: "1. Open /d/fc-marquee-perf/marquee-animation-performance 2. Filter renderer/node/phase 3. Compare latest AAT baseline diff 4. Restart only the affected player if the issue is node-local"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
service: github-runner
|
service: signage
|
||||||
alert_channel: irc
|
alert_channel: irc
|
||||||
team: ci
|
|
||||||
data:
|
data:
|
||||||
- refId: A
|
- refId: A
|
||||||
relativeTimeRange: {from: 300, to: 0}
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
datasourceUid: prometheus
|
datasourceUid: prometheus
|
||||||
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
|
model: {expr: '(sum by (renderer, node_id, phase) (rate(marquee_dropped_frames_total[5m])) / sum by (renderer, node_id, phase) (rate(marquee_render_latency_ms_count[5m]))) * 100', instant: true, refId: A}
|
||||||
- refId: B
|
- refId: B
|
||||||
relativeTimeRange: {from: 300, to: 0}
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
@@ -3471,7 +3393,61 @@ data:
|
|||||||
- refId: C
|
- refId: C
|
||||||
relativeTimeRange: {from: 300, to: 0}
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0], type: gt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [5], type: gt}}], refId: C}
|
||||||
|
- uid: marquee-render-latency-p99-high
|
||||||
|
title: MarqueeRenderLatencyP99High
|
||||||
|
condition: C
|
||||||
|
for: 5m
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: Marquee render latency p99 above 16ms
|
||||||
|
description: "Renderer p99 latency exceeded the Pi-class 16ms budget. Grafana delivers this alert to IRC #alerts."
|
||||||
|
runbook: "1. Open /d/fc-marquee-perf/marquee-animation-performance 2. Check render latency p99 by renderer/node/phase 3. Compare with dropped frames and node CPU 4. If isolated to WPF, capture current Player.Wpf frame set before restart"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'histogram_quantile(0.99, sum by (renderer, node_id, phase, le) (rate(marquee_render_latency_ms_bucket[5m])))', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [16], type: gt}}], refId: C}
|
||||||
|
- uid: marquee-animation-duration-drift
|
||||||
|
title: MarqueeAnimationDurationDrift
|
||||||
|
condition: C
|
||||||
|
for: 10m
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: Marquee animation duration drift above 10%
|
||||||
|
description: "Observed cycle duration has drifted more than 10% from target for a renderer/phase pair. Grafana delivers this alert to IRC #alerts."
|
||||||
|
runbook: "1. Open /d/fc-marquee-perf/marquee-animation-performance 2. Compare observed vs target duration 3. Check recent theme/preset changes 4. Re-run MarqueeHolidayBrandTrajectoryTests before promoting a baseline"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: signage
|
||||||
|
alert_channel: irc
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 900, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'abs((histogram_quantile(0.5, sum by (renderer, phase, le) (rate(marquee_animation_duration_ms_bucket[15m]))) - avg by (renderer, phase) (marquee_animation_duration_target_ms)) / avg by (renderer, phase) (marquee_animation_duration_target_ms))', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 900, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 900, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [0.1], type: gt}}], refId: C}
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
name: Infrastructure
|
name: Infrastructure
|
||||||
folder: AI Stack Alerts
|
folder: AI Stack Alerts
|
||||||
@@ -3504,32 +3480,6 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
- uid: macmini-runner-offline
|
|
||||||
title: MacMiniRunnerOffline
|
|
||||||
condition: C
|
|
||||||
for: 10m
|
|
||||||
noDataState: Alerting
|
|
||||||
execErrState: OK
|
|
||||||
annotations:
|
|
||||||
summary: Mac mini GitHub runner offline
|
|
||||||
description: "One or more macmini-* GitHub Actions runners have not reported online for more than 10 minutes. LaunchDaemons survive reboot and do not require the bluejay GUI session."
|
|
||||||
runbook: "1. ssh fcadmin@macmini.iamworkin.lan 2. launchctl print system/io.flowercore.github-runner-<slug> 3. Check /Users/fcadmin/Library/Logs/github-runners/<slug>/stderr.log 4. Re-register the repo runner if .runner is missing"
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
service: github-runner
|
|
||||||
data:
|
|
||||||
- refId: A
|
|
||||||
relativeTimeRange: {from: 600, to: 0}
|
|
||||||
datasourceUid: prometheus
|
|
||||||
model: {expr: 'min(flowercore_github_runner_online{runner=~"macmini-.*"} or vector(0))', instant: true, refId: A}
|
|
||||||
- refId: B
|
|
||||||
relativeTimeRange: {from: 600, to: 0}
|
|
||||||
datasourceUid: __expr__
|
|
||||||
model: {type: reduce, expression: A, reducer: last, refId: B}
|
|
||||||
- refId: C
|
|
||||||
relativeTimeRange: {from: 600, to: 0}
|
|
||||||
datasourceUid: __expr__
|
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
|
||||||
- uid: high-cpu
|
- uid: high-cpu
|
||||||
title: High CPU (>85%)
|
title: High CPU (>85%)
|
||||||
condition: C
|
condition: C
|
||||||
|
|||||||
@@ -188,24 +188,13 @@ spec:
|
|||||||
- name: kube-multus
|
- name: kube-multus
|
||||||
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
image: ghcr.io/k8snetworkplumbingwg/multus-cni:snapshot-thick
|
||||||
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
command: [ "/usr/src/multus-cni/bin/multus-daemon" ]
|
||||||
# 2026-05-11: upstream default of 50Mi memory limit OOM-cascades when
|
|
||||||
# an operator-owned namespace accumulates >100 pending pods retrying
|
|
||||||
# CNI ADD. RemoteDesktop emitted 219 orphan rd-browser-only pods
|
|
||||||
# (missing OwnerReferences), kubelet's CNI ADD avalanche pushed multus
|
|
||||||
# over 50Mi, OOMKilled, restarted with even bigger backlog → loop.
|
|
||||||
# 21h cluster outage. See FlowerCore.Notes:
|
|
||||||
# feedback_multus_50mi_limit_oom_orphan_pod_avalanche.md
|
|
||||||
# 1Gi limit / 512Mi request comfortably handles a 200+ pod CNI
|
|
||||||
# catchup burst on 64GB nodes (nodes are <25% used in steady-state).
|
|
||||||
# Drop back toward 256Mi only after MultusMemoryPressure alert
|
|
||||||
# proves steady-state working set sits well below 200Mi.
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
memory: "512Mi"
|
memory: "50Mi"
|
||||||
limits:
|
limits:
|
||||||
cpu: "100m"
|
cpu: "100m"
|
||||||
memory: "1Gi"
|
memory: "50Mi"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
terminationMessagePolicy: FallbackToLogsOnError
|
terminationMessagePolicy: FallbackToLogsOnError
|
||||||
|
|||||||
@@ -127,13 +127,10 @@ spec:
|
|||||||
initContainers:
|
initContainers:
|
||||||
- name: fix-data-perms
|
- name: fix-data-perms
|
||||||
image: busybox:latest
|
image: busybox:latest
|
||||||
# Must run as root to chown the hostPath /tmp/tts-audio that may be
|
# Also chown /shared-tts (hostPath /tmp/tts-audio) so the non-root
|
||||||
# root-owned after node reboot. Pod-level runAsNonRoot:true would
|
# app user (uid 1654) can write Piper .sln16 files that Asterisk
|
||||||
# otherwise inherit and chown would fail with EPERM (see Notes memory
|
# reads at /var/lib/asterisk/sounds/tts. World-readable (755) is
|
||||||
# feedback_hostpath_initcontainer_chown_perms).
|
# fine — Asterisk runs as a different uid in the other pod.
|
||||||
securityContext:
|
|
||||||
runAsUser: 0
|
|
||||||
runAsNonRoot: false
|
|
||||||
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
command: ["sh", "-c", "chown -R 1654:1654 /data && chown 1654:1654 /shared-tts && chmod 0755 /shared-tts"]
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: telephony-data
|
- name: telephony-data
|
||||||
|
|||||||
@@ -305,17 +305,15 @@ spec:
|
|||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 60
|
initialDelaySeconds: 60
|
||||||
timeoutSeconds: 15
|
timeoutSeconds: 5
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
failureThreshold: 3
|
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /
|
path: /
|
||||||
port: 8080
|
port: 8080
|
||||||
initialDelaySeconds: 30
|
initialDelaySeconds: 30
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
timeoutSeconds: 15
|
timeoutSeconds: 5
|
||||||
failureThreshold: 3
|
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Service
|
kind: Service
|
||||||
|
|||||||
@@ -54,43 +54,6 @@ public sealed class FleetManifestLintTests
|
|||||||
"ttsreader-piper",
|
"ttsreader-piper",
|
||||||
};
|
};
|
||||||
|
|
||||||
private static readonly IReadOnlyDictionary<string, string> LinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
|
|
||||||
{
|
|
||||||
["github-runner"] = "https://github.com/astoltz/FlowerCore.Common",
|
|
||||||
["github-runner-sharedpos"] = "https://github.com/astoltz/FlowerCore.Shared.Pos",
|
|
||||||
["github-runner-puppet"] = "https://github.com/astoltz/FlowerCore.Puppet",
|
|
||||||
["github-runner-signage"] = "https://github.com/astoltz/FlowerCore.Signage",
|
|
||||||
["github-runner-dms"] = "https://github.com/astoltz/FlowerCore.DMS",
|
|
||||||
["github-runner-telephony"] = "https://github.com/astoltz/FlowerCore.Telephony",
|
|
||||||
["github-runner-print-web"] = "https://github.com/astoltz/FlowerCore.Print.Web",
|
|
||||||
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
|
|
||||||
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
|
|
||||||
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
|
|
||||||
};
|
|
||||||
|
|
||||||
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
|
||||||
{
|
|
||||||
"github-runner-sharedpos",
|
|
||||||
"github-runner-puppet",
|
|
||||||
"github-runner-signage",
|
|
||||||
"github-runner-dms",
|
|
||||||
"github-runner-telephony",
|
|
||||||
"github-runner-print-web",
|
|
||||||
"github-runner-chat",
|
|
||||||
"github-runner-mysql",
|
|
||||||
"github-runner-kiosk-linux",
|
|
||||||
};
|
|
||||||
|
|
||||||
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
|
|
||||||
{
|
|
||||||
["HOME"] = "/home/runner",
|
|
||||||
["DOTNET_INSTALL_DIR"] = "/home/runner/.dotnet",
|
|
||||||
["DOTNET_CLI_HOME"] = "/home/runner",
|
|
||||||
["NUGET_PACKAGES"] = "/home/runner/.nuget/packages",
|
|
||||||
["XDG_CACHE_HOME"] = "/home/runner/.cache",
|
|
||||||
["RUNNER_TOOL_CACHE"] = "/home/runner/_tool",
|
|
||||||
};
|
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
||||||
{
|
{
|
||||||
@@ -224,98 +187,6 @@ public sealed class FleetManifestLintTests
|
|||||||
violations.Should().BeEmpty();
|
violations.Should().BeEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void GitHubRunnerFleet_MustRegisterRequiredReposAsRepoScopedDeployments()
|
|
||||||
{
|
|
||||||
var deployments = GitHubRunnerDeployments();
|
|
||||||
|
|
||||||
foreach (var expectedRunner in LinuxRunnerRepos)
|
|
||||||
{
|
|
||||||
deployments.Should().ContainKey(expectedRunner.Key);
|
|
||||||
|
|
||||||
var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
|
|
||||||
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
|
||||||
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
|
||||||
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
|
||||||
EnvValue(container, "RUN_AS_ROOT").Should().Be("false");
|
|
||||||
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
|
|
||||||
EnvSecretName(container, "ACCESS_TOKEN").Should().Be("github-runner-token");
|
|
||||||
EnvSecretKey(container, "ACCESS_TOKEN").Should().Be("credential");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void GitHubRunnerFleet_MustSetWritableNonRootDotnetAndCachePaths()
|
|
||||||
{
|
|
||||||
foreach (var deployment in GitHubRunnerDeployments().Values)
|
|
||||||
{
|
|
||||||
var container = deployment.ContainerMappings().Should().ContainSingle().Subject;
|
|
||||||
|
|
||||||
foreach (var expectedEnv in WritableRunnerEnv)
|
|
||||||
{
|
|
||||||
EnvValue(container, expectedEnv.Key).Should().Be(expectedEnv.Value, $"{deployment.Name} must keep .NET paths writable for uid 1001");
|
|
||||||
}
|
|
||||||
|
|
||||||
var mounts = ManifestNodeExtensions.MappingSequence(container, "volumeMounts")
|
|
||||||
.ToDictionary(
|
|
||||||
mount => ManifestNodeExtensions.Scalar(mount, "name") ?? string.Empty,
|
|
||||||
mount => ManifestNodeExtensions.Scalar(mount, "mountPath") ?? string.Empty,
|
|
||||||
StringComparer.Ordinal);
|
|
||||||
|
|
||||||
mounts.Should().Contain("runner-home", "/home/runner");
|
|
||||||
mounts.Should().Contain("nuget-cache", "/home/runner/.nuget/packages");
|
|
||||||
mounts.Should().Contain("tmp", "/tmp");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void GitHubRunnerFleet_MustAvoidRwoMultiAttachForScaledDeployments()
|
|
||||||
{
|
|
||||||
var deployments = GitHubRunnerDeployments();
|
|
||||||
|
|
||||||
foreach (var deploymentName in ScaledLinuxRunnerDeployments)
|
|
||||||
{
|
|
||||||
var deployment = deployments[deploymentName];
|
|
||||||
ReplicaCount(deployment).Should().Be(2);
|
|
||||||
|
|
||||||
var volumes = deployment.MappingSequence("spec", "template", "spec", "volumes");
|
|
||||||
var claimNames = volumes
|
|
||||||
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
|
|
||||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
claimNames.Should().BeEmpty($"{deploymentName} is scaled and must not share a RWO PVC");
|
|
||||||
volumes.Should().Contain(volume =>
|
|
||||||
string.Equals(ManifestNodeExtensions.Scalar(volume, "name"), "nuget-cache", StringComparison.Ordinal)
|
|
||||||
&& ManifestNodeExtensions.Mapping(volume, "emptyDir") != null);
|
|
||||||
}
|
|
||||||
|
|
||||||
var common = deployments["github-runner"];
|
|
||||||
ReplicaCount(common).Should().Be(1);
|
|
||||||
common.MappingSequence("spec", "template", "spec", "volumes")
|
|
||||||
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
|
|
||||||
.Where(value => !string.IsNullOrWhiteSpace(value))
|
|
||||||
.Should()
|
|
||||||
.ContainSingle()
|
|
||||||
.Which
|
|
||||||
.Should()
|
|
||||||
.Be("github-runner-nuget-cache");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void Monitoring_MustAlertWhenLinuxRunnerDeploymentIsUnavailable()
|
|
||||||
{
|
|
||||||
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
|
||||||
|
|
||||||
monitoring.Should().Contain("MacMiniRunnerOffline");
|
|
||||||
monitoring.Should().Contain("LinuxRunnerOffline");
|
|
||||||
monitoring.Should().Contain("kube_deployment_status_replicas_ready");
|
|
||||||
monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
|
|
||||||
monitoring.Should().Contain("folder: CI Alerts");
|
|
||||||
monitoring.Should().Contain("uid: linux-runner-offline");
|
|
||||||
monitoring.Should().Contain("alert_channel: irc");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||||
{
|
{
|
||||||
@@ -443,44 +314,6 @@ public sealed class FleetManifestLintTests
|
|||||||
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IReadOnlyDictionary<string, ManifestDocument> GitHubRunnerDeployments()
|
|
||||||
{
|
|
||||||
return Inventory.Documents
|
|
||||||
.Where(document => document.Kind == "Deployment")
|
|
||||||
.Where(document => document.Namespace == "github-runner")
|
|
||||||
.ToDictionary(document => document.Name, StringComparer.Ordinal);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int ReplicaCount(ManifestDocument document)
|
|
||||||
{
|
|
||||||
return int.TryParse(document.Scalar("spec", "replicas"), out var replicas) ? replicas : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string? EnvValue(YamlMappingNode container, string name)
|
|
||||||
{
|
|
||||||
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string? EnvSecretName(YamlMappingNode container, string name)
|
|
||||||
{
|
|
||||||
return EnvMapping(container, name) is { } env
|
|
||||||
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "name")
|
|
||||||
: null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string? EnvSecretKey(YamlMappingNode container, string name)
|
|
||||||
{
|
|
||||||
return EnvMapping(container, name) is { } env
|
|
||||||
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "key")
|
|
||||||
: null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name)
|
|
||||||
{
|
|
||||||
return ManifestNodeExtensions.MappingSequence(container, "env")
|
|
||||||
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
internal sealed class ManifestInventory
|
internal sealed class ManifestInventory
|
||||||
|
|||||||
@@ -1,269 +0,0 @@
|
|||||||
using System.Text.Json;
|
|
||||||
using FluentAssertions;
|
|
||||||
using Xunit;
|
|
||||||
|
|
||||||
namespace BluejayInfraLint.Tests;
|
|
||||||
|
|
||||||
[Trait("Category", "Unit")]
|
|
||||||
public sealed class PiSignagePlayerArtifactTests
|
|
||||||
{
|
|
||||||
private static readonly string Root = FindRepoRoot();
|
|
||||||
private static readonly string AppRoot = Path.Combine(Root, "apps", "fc-signage-pi-player");
|
|
||||||
|
|
||||||
public static TheoryData<string> RequiredArtifacts => new()
|
|
||||||
{
|
|
||||||
"README.md",
|
|
||||||
"systemd/flowercore-signage-player-pi.service",
|
|
||||||
"systemd/flowercore-signage-player-pi-hdmi.service",
|
|
||||||
"systemd/flowercore-signage-bootstrap.service",
|
|
||||||
"systemd/flowercore-signage-renew.service",
|
|
||||||
"systemd/flowercore-signage-renew.timer",
|
|
||||||
"systemd/flowercore-signage-detect-display.service",
|
|
||||||
"systemd/flowercore-signage-detect-display.timer",
|
|
||||||
"systemd/99-flowercore-signage-hdmi.rules",
|
|
||||||
"chromium-policies/flowercore-signage.json",
|
|
||||||
"scripts/flowercore-signage-launch.sh",
|
|
||||||
"scripts/flowercore-signage-prelaunch.sh",
|
|
||||||
"scripts/flowercore-signage-bootstrap.sh",
|
|
||||||
"scripts/flowercore-signage-renew-cert.sh",
|
|
||||||
"scripts/flowercore-signage-hdmi-respond.sh",
|
|
||||||
"scripts/fc-signage-detect-display",
|
|
||||||
};
|
|
||||||
|
|
||||||
[Theory]
|
|
||||||
[MemberData(nameof(RequiredArtifacts))]
|
|
||||||
public void RequiredArtifacts_ArePresent(string relativePath)
|
|
||||||
{
|
|
||||||
File.Exists(Path.Combine(AppRoot, relativePath)).Should().BeTrue(relativePath);
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void PlayerService_UsesExpectedRestartAndMemoryGuards()
|
|
||||||
{
|
|
||||||
var unit = Read("systemd/flowercore-signage-player-pi.service");
|
|
||||||
|
|
||||||
unit.Should().Contain("Restart=always");
|
|
||||||
unit.Should().Contain("RestartSec=10s");
|
|
||||||
unit.Should().Contain("StartLimitBurst=5");
|
|
||||||
unit.Should().Contain("StartLimitIntervalSec=300s");
|
|
||||||
unit.Should().Contain("MemoryMax=2G");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void PlayerService_IsGatedByNodeIdentityAndMtlsCertificate()
|
|
||||||
{
|
|
||||||
var unit = Read("systemd/flowercore-signage-player-pi.service");
|
|
||||||
|
|
||||||
unit.Should().Contain("ConditionPathExists=/etc/flowercore/signage-node.json");
|
|
||||||
unit.Should().Contain("ConditionPathExists=/etc/fc-signage-player/client.p12");
|
|
||||||
unit.Should().Contain("ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LaunchScript_TriesEmbedThenFallsBackToBarePlayerRoute()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-launch.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("/player/${NODE_ID}/embed?token=${CERT_THUMB}");
|
|
||||||
script.Should().Contain("url-divergence.log");
|
|
||||||
script.Should().Contain("/player/${NODE_ID}?token=${CERT_THUMB}");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void LaunchScript_DisablesChromiumPromptsAndRuntimeUpdates()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-launch.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("--noerrdialogs");
|
|
||||||
script.Should().Contain("--disable-infobars");
|
|
||||||
script.Should().Contain("--password-store=basic");
|
|
||||||
script.Should().Contain("--check-for-update-interval=2592000");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void PrelaunchScript_AbortsWhenRequiredFilesAreMissing()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-prelaunch.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("for f in /etc/flowercore/signage-node.json /etc/fc-signage-player/client.p12 /etc/fc-signage-player/client.p12.pass");
|
|
||||||
script.Should().Contain("exit 1");
|
|
||||||
script.Should().Contain("-checkend $((7*24*3600))");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_IsIdempotentWhenAlreadyEnrolled()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("already enrolled");
|
|
||||||
script.Should().Contain("exit 0");
|
|
||||||
script.Should().Contain(".enrolledAt");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_GeneratesStableMachineIdFromUuid()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("uuidgen");
|
|
||||||
script.Should().Contain("cut -c1-16");
|
|
||||||
script.Should().Contain("machineId");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_RetriesRegisterOnceForFirstCallRace()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("for attempt in 1 2");
|
|
||||||
script.Should().Contain("register attempt $attempt returned");
|
|
||||||
script.Should().Contain("sleep 5");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_SupportsSetupCodeAndApprovalPollingBudget()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("signage-setup-code");
|
|
||||||
script.Should().Contain("approve-via-setup-code");
|
|
||||||
script.Should().Contain("+ 1800");
|
|
||||||
script.Should().Contain("sleep 15");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_CsrSubjectIdentifiesPiPlayer()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void BootstrapScript_PersistsCertificateAsP12WithRestrictivePermissions()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-bootstrap.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("openssl pkcs12 -export");
|
|
||||||
script.Should().Contain("client.p12.pass");
|
|
||||||
script.Should().Contain("chmod 0600");
|
|
||||||
script.Should().Contain("chmod 0640");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void RenewScript_OnlyRunsWhenCertHasLessThanThirtyDays()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-renew-cert.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("-checkend $((30*24*3600))");
|
|
||||||
script.Should().Contain("exit 0");
|
|
||||||
script.Should().Contain("/renew");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void RenewScript_AtomicallySwapsNewCertificateFiles()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/flowercore-signage-renew-cert.sh");
|
|
||||||
|
|
||||||
script.Should().Contain("client.key.new");
|
|
||||||
script.Should().Contain("mv \"$CERT_DIR/client.key.new\" \"$CERT_DIR/client.key\"");
|
|
||||||
script.Should().Contain("mv \"$CERT_DIR/client.p12.new\" \"$CERT_DIR/client.p12\"");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void HdmiRule_RestartsPlayerAndRunsCapabilityDetection()
|
|
||||||
{
|
|
||||||
var rule = Read("systemd/99-flowercore-signage-hdmi.rules");
|
|
||||||
var responder = Read("scripts/flowercore-signage-hdmi-respond.sh");
|
|
||||||
|
|
||||||
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
|
|
||||||
rule.Should().Contain("start flowercore-signage-player-pi-hdmi.service");
|
|
||||||
responder.Should().Contain("sleep 2");
|
|
||||||
responder.Should().Contain("start flowercore-signage-detect-display.service");
|
|
||||||
responder.Should().Contain("restart flowercore-signage-player-pi.service");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void DetectDisplayServiceAndTimer_RunAtBootAndDaily()
|
|
||||||
{
|
|
||||||
var service = Read("systemd/flowercore-signage-detect-display.service");
|
|
||||||
var timer = Read("systemd/flowercore-signage-detect-display.timer");
|
|
||||||
|
|
||||||
service.Should().Contain("ExecStart=/usr/local/bin/fc-signage-detect-display");
|
|
||||||
timer.Should().Contain("OnBootSec=30s");
|
|
||||||
timer.Should().Contain("OnCalendar=daily");
|
|
||||||
timer.Should().Contain("RandomizedDelaySec=1h");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void DetectDisplayScript_EmitsDisconnectedProfileWhenNoHdmiIsPresent()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/fc-signage-detect-display");
|
|
||||||
|
|
||||||
script.Should().Contain("displayConnected: false");
|
|
||||||
script.Should().Contain("No HDMI display detected");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void DetectDisplayScript_ParsesEdidForHdrResolutionAndAudio()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/fc-signage-detect-display");
|
|
||||||
|
|
||||||
script.Should().Contain("edid-decode");
|
|
||||||
script.Should().Contain("HDR (Static|Dynamic) Metadata Block");
|
|
||||||
script.Should().Contain("maxResolution");
|
|
||||||
script.Should().Contain("hasAudioOutput");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void DetectDisplayScript_TriesBothForwardCompatibleCapabilityEndpoints()
|
|
||||||
{
|
|
||||||
var script = Read("scripts/fc-signage-detect-display");
|
|
||||||
|
|
||||||
script.Should().Contain("/api/v1/nodes/${NODE_ID}/capabilities");
|
|
||||||
script.Should().Contain("/api/v1/displays/${NODE_ID}/capability-profile");
|
|
||||||
script.Should().Contain("no endpoint accepted the profile");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void ChromiumPolicy_IsValidJsonAndDisablesCredentialPrompts()
|
|
||||||
{
|
|
||||||
using var doc = JsonDocument.Parse(Read("chromium-policies/flowercore-signage.json"));
|
|
||||||
var root = doc.RootElement;
|
|
||||||
|
|
||||||
root.GetProperty("AutofillAddressEnabled").GetBoolean().Should().BeFalse();
|
|
||||||
root.GetProperty("AutofillCreditCardEnabled").GetBoolean().Should().BeFalse();
|
|
||||||
root.GetProperty("PasswordManagerEnabled").GetBoolean().Should().BeFalse();
|
|
||||||
root.GetProperty("ExtensionInstallBlocklist")[0].GetString().Should().Be("*");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void RenewalTimer_UsesDailyCadenceWithTwoHourJitter()
|
|
||||||
{
|
|
||||||
var timer = Read("systemd/flowercore-signage-renew.timer");
|
|
||||||
|
|
||||||
timer.Should().Contain("OnCalendar=daily");
|
|
||||||
timer.Should().Contain("RandomizedDelaySec=2h");
|
|
||||||
timer.Should().Contain("Persistent=true");
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string Read(string relativePath)
|
|
||||||
=> File.ReadAllText(Path.Combine(AppRoot, relativePath.Replace('/', Path.DirectorySeparatorChar)));
|
|
||||||
|
|
||||||
private static string FindRepoRoot()
|
|
||||||
{
|
|
||||||
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
|
||||||
while (current is not null)
|
|
||||||
{
|
|
||||||
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
|
||||||
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
|
||||||
{
|
|
||||||
return current.FullName;
|
|
||||||
}
|
|
||||||
|
|
||||||
current = current.Parent;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user