Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Stoltz
7256cfe38e feat(github-runner): harden Linux runner fleet 2026-05-17 16:27:41 -05:00
6 changed files with 5 additions and 432 deletions

View File

@@ -1,2 +1,3 @@
# Settle DRM for 2s before restarting Chromium, then redeclare capabilities.
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-player-pi-hdmi.service"
# Restart kiosk and redeclare capabilities when HDMI connect/disconnect changes DRM state.
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl restart flowercore-signage-player-pi.service"
SUBSYSTEM=="drm", KERNEL=="card?-HDMI-A-?", ACTION=="change", RUN+="/usr/bin/systemctl start flowercore-signage-detect-display.service"

View File

@@ -1,22 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
DETECT="$APP_ROOT/scripts/fc-signage-detect-display"
}
@test "display detection emits graceful disconnected profile when no hdmi connector is present" {
script="$(cat "$DETECT")"
[[ "$script" == *"displayConnected: false"* ]]
[[ "$script" == *"No HDMI display detected"* ]]
}
@test "display detection parses edid, falls back to kmsprint, and logs endpoint failures locally" {
script="$(cat "$DETECT")"
[[ "$script" == *"edid-decode"* ]]
[[ "$script" == *"HDR (Static|Dynamic) Metadata Block"* ]]
[[ "$script" == *"kmsprint"* ]]
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/capabilities"* ]]
[[ "$script" == *"/api/v1/displays/\${NODE_ID}/capability-profile"* ]]
[[ "$script" == *"capabilities.log"* ]]
}

View File

@@ -1,64 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
BOOTSTRAP="$APP_ROOT/scripts/flowercore-signage-bootstrap.sh"
RENEW="$APP_ROOT/scripts/flowercore-signage-renew-cert.sh"
}
@test "bootstrap is idempotent when node is already enrolled" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *'[[ -s "$NODE_JSON" && -s "$CERT_DIR/client.p12" ]]'* ]]
[[ "$script" == *"already enrolled"* ]]
[[ "$script" == *"exit 0"* ]]
}
@test "bootstrap generates a stable node uuid and machine id" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"uuidgen"* ]]
[[ "$script" == *"nodeUuid"* ]]
[[ "$script" == *"machineId"* ]]
[[ "$script" == *"cut -c1-16"* ]]
}
@test "bootstrap posts to the canonical register endpoint" {
grep -q '/api/v1/nodes/register' "$BOOTSTRAP"
grep -q '"linux-arm64-pi"' "$BOOTSTRAP"
}
@test "bootstrap retries registration once for first-call races" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"for attempt in 1 2"* ]]
[[ "$script" == *"register attempt \$attempt returned"* ]]
[[ "$script" == *"sleep 5"* ]]
}
@test "bootstrap supports setup-code approval with manual polling fallback" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"signage-setup-code"* ]]
[[ "$script" == *"approve-via-setup-code"* ]]
[[ "$script" == *"+ 1800"* ]]
[[ "$script" == *"sleep 15"* ]]
}
@test "bootstrap generates an ecdsa p256 csr for the signage pi subject" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"ecparam -genkey -name prime256v1"* ]]
[[ "$script" == *'/CN=${NODE_ID}/O=FlowerCore/OU=SignagePlayer-Pi'* ]]
}
@test "bootstrap writes pkcs12 bundle with restrictive permissions" {
script="$(cat "$BOOTSTRAP")"
[[ "$script" == *"openssl pkcs12 -export"* ]]
[[ "$script" == *"client.p12.pass"* ]]
[[ "$script" == *"chmod 0640"* ]]
[[ "$script" == *"chmod 0600"* ]]
}
@test "renewal only calls renew endpoint inside the thirty-day window and swaps atomically" {
script="$(cat "$RENEW")"
[[ "$script" == *'-checkend $((30*24*3600))'* ]]
[[ "$script" == *"/api/v1/nodes/\${NODE_ID}/renew"* ]]
[[ "$script" == *"client.key.new"* ]]
[[ "$script" == *'mv "$CERT_DIR/client.p12.new" "$CERT_DIR/client.p12"'* ]]
}

View File

@@ -1,68 +0,0 @@
#!/usr/bin/env bats
setup() {
APP_ROOT="$(cd "$BATS_TEST_DIRNAME/.." && pwd)"
}
@test "player unit exists" {
[ -f "$APP_ROOT/systemd/flowercore-signage-player-pi.service" ]
}
@test "player unit uses simple chromium service with restart backoff" {
unit="$(cat "$APP_ROOT/systemd/flowercore-signage-player-pi.service")"
[[ "$unit" == *"Type=simple"* ]]
[[ "$unit" == *"Restart=always"* ]]
[[ "$unit" == *"RestartSec=10s"* ]]
[[ "$unit" == *"StartLimitBurst=5"* ]]
[[ "$unit" == *"StartLimitIntervalSec=300s"* ]]
}
@test "player unit caps chromium memory at two gigabytes" {
grep -q '^MemoryMax=2G$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^MemoryHigh=1500M$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "player unit condition-gates startup on identity and p12 certificate" {
grep -q '^ConditionPathExists=/etc/flowercore/signage-node.json$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^ConditionPathExists=/etc/fc-signage-player/client.p12$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "player unit runs prelaunch checks before chromium" {
grep -q '^ExecStartPre=/usr/local/bin/flowercore-signage-prelaunch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
grep -q '^ExecStart=/usr/local/bin/flowercore-signage-launch.sh$' "$APP_ROOT/systemd/flowercore-signage-player-pi.service"
}
@test "hdmi udev rule routes through the two-second settle service" {
rule="$(cat "$APP_ROOT/systemd/99-flowercore-signage-hdmi.rules")"
[[ "$rule" == *'KERNEL=="card?-HDMI-A-?"'* ]]
[[ "$rule" == *"systemctl start flowercore-signage-player-pi-hdmi.service"* ]]
[[ "$rule" != *"systemctl restart flowercore-signage-player-pi.service"* ]]
}
@test "hdmi responder settles, declares display, then restarts chromium" {
responder="$(cat "$APP_ROOT/scripts/flowercore-signage-hdmi-respond.sh")"
[[ "$responder" == *"sleep 2"* ]]
[[ "$responder" == *"systemctl start flowercore-signage-detect-display.service"* ]]
[[ "$responder" == *"systemctl restart flowercore-signage-player-pi.service"* ]]
}
@test "chromium policy json is valid and disables credential prompts" {
command -v jq >/dev/null || skip "jq not installed"
jq -e '.AutofillAddressEnabled == false and .AutofillCreditCardEnabled == false and .PasswordManagerEnabled == false' \
"$APP_ROOT/chromium-policies/flowercore-signage.json" >/dev/null
}
@test "launch script tries embed URL and logs bare-player fallback" {
launch="$(cat "$APP_ROOT/scripts/flowercore-signage-launch.sh")"
[[ "$launch" == *'/player/${NODE_ID}/embed?token=${CERT_THUMB}'* ]]
[[ "$launch" == *"url-divergence.log"* ]]
[[ "$launch" == *'/player/${NODE_ID}?token=${CERT_THUMB}'* ]]
}
@test "prelaunch script validates required node and cert files" {
prelaunch="$(cat "$APP_ROOT/scripts/flowercore-signage-prelaunch.sh")"
[[ "$prelaunch" == *"/etc/flowercore/signage-node.json"* ]]
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12"* ]]
[[ "$prelaunch" == *"/etc/fc-signage-player/client.p12.pass"* ]]
[[ "$prelaunch" == *"exit 1"* ]]
}

View File

@@ -137,32 +137,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
# Sprint 30 Cl-1 pod-env fix (2026-05-21): pre-create + chown
# /home/runner/.dotnet + /home/runner/.nuget so the non-root runner
# (UID 1001) can host setup-dotnet@v4 + dotnet restore writes without
# the per-workflow DOTNET_INSTALL_DIR patch ~25 flipped Linux repos
# have been carrying. Runs as root so chown succeeds; the main
# container then runs as 1001 against an emptyDir mounted at
# /home/runner. The PVC mount at /home/runner/.nuget/packages
# (Common runner) still wins at its nested path because Kubernetes
# honors the deeper mount.
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -214,22 +188,6 @@ spec:
# setup steps (cache prewarm + apt updates — both already baked).
- name: RUN_AS_ROOT
value: "false"
# Sprint 30 Cl-1 pod-env fix (2026-05-21): retire the per-workflow
# DOTNET_INSTALL_DIR patch by setting it (+ siblings) here so ALL
# flipped Linux repos pick it up automatically. setup-dotnet@v4
# default writes to /usr/share/dotnet (root-only) or HOME-relative
# ~/.dotnet without HOME guard; pin both explicitly to the chowned
# emptyDir at /home/runner.
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: NUGET_PACKAGES
value: "/home/runner/.nuget/packages"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
resources:
requests:
cpu: "500m"
@@ -238,12 +196,6 @@ spec:
cpu: "2000m"
memory: "4Gi"
volumeMounts:
# /home/runner emptyDir — owned by the non-root runner thanks
# to the setup-runner-home initContainer chown. Hosts .dotnet
# (setup-dotnet@v4 target) and provides a writable HOME without
# forcing a PVC. The PVC mount below at .nuget/packages wins
# at that nested path (deeper mount overrides), so the Common
# NuGet cache continues to persist across ephemeral pod restarts.
- name: runner-home
mountPath: /home/runner
- name: nuget-cache
@@ -294,9 +246,6 @@ metadata:
flowercore.io/runner-repo: sharedpos
flowercore.io/github-repo: FlowerCore.Shared.Pos
spec:
# UN-PARKED 2026-05-21: Shared.Pos #5 fixed the non-root setup-dotnet path
# (DOTNET_INSTALL_DIR step-scoped). Sprint 30 Cl-8 capacity Q-CI-52: raised
# to replicas: 2 to absorb top-8 burst load per substrate-recommended default.
replicas: 2
selector:
matchLabels:
@@ -319,27 +268,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
# Sprint 30 Cl-1 pod-env fix (2026-05-21): see github-runner Deployment
# above for full rationale. Mirrored on the Shared.Pos runner so the
# per-workflow DOTNET_INSTALL_DIR patch can be retired fleet-wide
# rather than re-applied per repo as flipped lanes land.
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -374,17 +302,6 @@ spec:
key: credential
- name: RUN_AS_ROOT
value: "false"
# Sprint 30 Cl-1 pod-env fix (2026-05-21): retire per-workflow patch.
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: NUGET_PACKAGES
value: "/home/runner/.nuget/packages"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
resources:
requests:
cpu: "500m"
@@ -393,10 +310,6 @@ spec:
cpu: "2000m"
memory: "4Gi"
volumeMounts:
# Shared.Pos runner uses emptyDir for nuget cache (no node pin
# via RWO PVC). /home/runner emptyDir hosts .dotnet via the
# setup-runner-home initContainer chown; the .nuget/packages
# emptyDir mount still wins at its nested path.
- name: runner-home
mountPath: /home/runner
- name: nuget-cache
@@ -460,23 +373,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -496,12 +392,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -588,23 +478,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -624,12 +497,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -716,23 +583,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -752,12 +602,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -844,23 +688,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -880,12 +707,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -972,23 +793,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -1008,12 +812,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -1100,23 +898,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -1136,12 +917,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -1228,23 +1003,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -1264,12 +1022,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES
@@ -1356,23 +1108,6 @@ spec:
runAsUser: 1001
runAsGroup: 1001
fsGroup: 1001
initContainers:
- name: setup-runner-home
image: busybox:1.36
command:
- sh
- -c
- |
set -e
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
securityContext:
runAsUser: 0
runAsNonRoot: false
volumeMounts:
- name: runner-home
mountPath: /home/runner
containers:
- name: runner
image: myoung34/github-runner:latest
@@ -1392,12 +1127,6 @@ spec:
value: "/home/runner"
- name: DOTNET_INSTALL_DIR
value: "/home/runner/.dotnet"
- name: DOTNET_CLI_TELEMETRY_OPTOUT
value: "1"
- name: DOTNET_NOLOGO
value: "1"
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
value: "false"
- name: DOTNET_CLI_HOME
value: "/home/runner"
- name: NUGET_PACKAGES

View File

@@ -174,13 +174,10 @@ public sealed class PiSignagePlayerArtifactTests
public void HdmiRule_RestartsPlayerAndRunsCapabilityDetection()
{
var rule = Read("systemd/99-flowercore-signage-hdmi.rules");
var responder = Read("scripts/flowercore-signage-hdmi-respond.sh");
rule.Should().Contain("KERNEL==\"card?-HDMI-A-?\"");
rule.Should().Contain("start flowercore-signage-player-pi-hdmi.service");
responder.Should().Contain("sleep 2");
responder.Should().Contain("start flowercore-signage-detect-display.service");
responder.Should().Contain("restart flowercore-signage-player-pi.service");
rule.Should().Contain("restart flowercore-signage-player-pi.service");
rule.Should().Contain("start flowercore-signage-detect-display.service");
}
[Fact]