# GitHub Actions self-hosted Linux runner — Phase 2 K8s deployment # # Phase 1 (current): BLUEJAY-WS registered manually as a Windows runner # with label "fc-build-windows" via config.cmd (see docs/infrastructure/ # self-hosted-runner-fleet.md §WPF Build Runner). # # Phase 2 (this file): ephemeral Linux runner in RKE2 for non-WPF builds # (Blazor Server, class libraries, operators, integration tests). Reduces # billing for ubuntu-24.04 jobs that run on GitHub-hosted runners today. # # Runner image: myoung34/github-runner:latest # EPHEMERAL=true — each pod runs exactly one job then exits; the # Deployment controller immediately recreates it and re-registers. # Prevents job queue starvation when two jobs overlap. # # NuGet cache: 5Gi Longhorn RWO PVC mounted at /home/runner/.nuget/packages # Persists NuGet packages across ephemeral pod restarts (not shared across # simultaneous runner pods; single-replica constraint below). # # Credentials: # OnePasswordItem "GitHub PAT (Runner Registration)" → Secret # github-runner-token with field "credential" used as RUNNER_TOKEN. # Operator must create/rotate the 1P item manually; registration tokens # expire after 1h — use a fine-grained PAT with Administration:read/write # scope on the target repos, or a re-registration script. See # docs/infrastructure/self-hosted-runner-fleet.md §Security. # # Security model: # - No ClusterRole / ClusterRoleBinding — runner has no K8s API access. # - securityContext: runAsNonRoot with read-only root filesystem where # possible (runner image needs /tmp and /home/runner writable). # - Fork pull-request approval required on the GitHub repo settings. # - RUNNER_ALLOW_RUNASROOT=false is the default. # # Cost: Phase 2 eliminates GitHub-hosted ubuntu-24.04 billing; break-even # vs electricity is ~1 000 min/month at current TOU rates. # # Node placement: rke2-server (10.0.56.11) only — Longhorn RWO PVC must # land on the same node as the volume, and the server node has the most # spare capacity for burst CI workloads. # # Designs: docs/infrastructure/self-hosted-runner-fleet.md # Questions: Q-CI-1..5 (all Recommended defaults) --- apiVersion: v1 kind: Namespace metadata: name: github-runner labels: app.kubernetes.io/part-of: flowercore app.kubernetes.io/managed-by: argocd --- # 1Password secret sync — creates github-runner-token K8s Secret. # Fields expected in the 1Password item: # credential — GitHub fine-grained PAT (Administration:read/write on # target repos) used by the runner image to mint a fresh # short-lived registration token at pod start. # Item path: IAmWorkin vault > "GitHub PAT (Runner Registration)" # Operator MUST create this item before the Deployment will start cleanly. apiVersion: onepassword.com/v1 kind: OnePasswordItem metadata: name: github-runner-token namespace: github-runner labels: app.kubernetes.io/component: credentials app.kubernetes.io/part-of: flowercore spec: itemPath: vaults/IAmWorkin/items/GitHub PAT (Runner Registration) --- apiVersion: v1 kind: PersistentVolumeClaim metadata: name: github-runner-nuget-cache namespace: github-runner labels: app.kubernetes.io/component: cache app.kubernetes.io/part-of: flowercore spec: accessModes: - ReadWriteOnce storageClassName: longhorn resources: requests: storage: 5Gi volumeMode: Filesystem --- apiVersion: v1 kind: ServiceAccount metadata: name: github-runner namespace: github-runner labels: app.kubernetes.io/component: runner app.kubernetes.io/part-of: flowercore # No ClusterRole or ClusterRoleBinding — runner has zero K8s API privileges. # CI jobs that need kubectl must supply their own kubeconfig via a secret # injected at the job level, not via this service account. --- apiVersion: apps/v1 kind: Deployment metadata: name: github-runner namespace: github-runner labels: app.kubernetes.io/name: github-runner app.kubernetes.io/component: runner app.kubernetes.io/part-of: flowercore app.kubernetes.io/managed-by: argocd flowercore.io/created-by: argocd spec: # Single replica enforced: the Longhorn RWO PVC can only be mounted by # one pod at a time. Each pod re-registers as an ephemeral runner after # completing a job (EPHEMERAL=true restarts the container, not the pod, # so the PVC stays attached between jobs). # # 2026-05-16: bumped 0 -> 1 after operator provisioned the # "GitHub PAT (Runner Registration)" 1P item with field=credential. # Unblocks CI fleet-wide (was budget-exhausted on GH-hosted runners). replicas: 1 selector: matchLabels: app.kubernetes.io/name: github-runner # Use Recreate to avoid the Multi-Attach RWO error during rollouts. strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: github-runner app.kubernetes.io/component: runner app.kubernetes.io/part-of: flowercore flowercore.io/created-by: argocd spec: serviceAccountName: github-runner # Pin to rke2-server so the Longhorn RWO volume is always on the same node. nodeSelector: kubernetes.io/hostname: rke2-server securityContext: runAsNonRoot: true runAsUser: 1001 runAsGroup: 1001 fsGroup: 1001 # Sprint 30 Cl-1 pod-env fix (2026-05-21): pre-create + chown # /home/runner/.dotnet + /home/runner/.nuget so the non-root runner # (UID 1001) can host setup-dotnet@v4 + dotnet restore writes without # the per-workflow DOTNET_INSTALL_DIR patch ~25 flipped Linux repos # have been carrying. Runs as root so chown succeeds; the main # container then runs as 1001 against an emptyDir mounted at # /home/runner. The PVC mount at /home/runner/.nuget/packages # (Common runner) still wins at its nested path because Kubernetes # honors the deeper mount. initContainers: - name: setup-runner-home image: busybox:1.36 command: - sh - -c - | set -e mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget chmod -R 755 /home/runner/.dotnet /home/runner/.nuget securityContext: runAsUser: 0 runAsNonRoot: false volumeMounts: - name: runner-home mountPath: /home/runner containers: - name: runner image: myoung34/github-runner:latest imagePullPolicy: Always env: # GitHub org/repo targeting. # Set REPO_URL for a repo-scoped runner (cheaper, simpler). # Switch to ORG_NAME + empty REPO_URL for an org-scoped runner. - name: REPO_URL value: "https://github.com/astoltz/FlowerCore.Common" - name: RUNNER_NAME_PREFIX value: "rke2-linux" - name: RUNNER_WORKDIR value: "/tmp/runner/work" # EPHEMERAL=true: runner deregisters after one job; container # exits with code 0; Deployment controller restarts it and a # fresh registration occurs. Prevents stale runner accumulation. - name: EPHEMERAL value: "true" # Labels used by workflow files: runs-on: [self-hosted, linux, fc-build-linux] - name: LABELS value: "self-hosted,linux,fc-build-linux" # PAT (not pre-minted registration token) — myoung34/github-runner # mints registration tokens itself via GitHub API when ACCESS_TOKEN # is set. Switched from RUNNER_TOKEN -> ACCESS_TOKEN on 2026-05-16 # because the 1P "GitHub PAT (Runner Registration)" item stores a # fine-grained PAT, not a short-lived registration token. - name: ACCESS_TOKEN valueFrom: secretKeyRef: name: github-runner-token key: credential # myoung34/github-runner default entrypoint expects root for some # setup steps. With securityContext.runAsUser=1001 the entrypoint # short-circuits with "RUN_AS_ROOT env var is set to true but the # user has been overridden and is not running as root". Tell the # entrypoint we're explicitly NOT root so it skips the root-only # setup steps (cache prewarm + apt updates — both already baked). - name: RUN_AS_ROOT value: "false" # Sprint 30 Cl-1 pod-env fix (2026-05-21): retire the per-workflow # DOTNET_INSTALL_DIR patch by setting it (+ siblings) here so ALL # flipped Linux repos pick it up automatically. setup-dotnet@v4 # default writes to /usr/share/dotnet (root-only) or HOME-relative # ~/.dotnet without HOME guard; pin both explicitly to the chowned # emptyDir at /home/runner. - name: DOTNET_INSTALL_DIR value: "/home/runner/.dotnet" - name: DOTNET_CLI_TELEMETRY_OPTOUT value: "1" - name: NUGET_PACKAGES value: "/home/runner/.nuget/packages" - name: DOTNET_NOLOGO value: "1" - name: DOTNET_GENERATE_ASPNET_CERTIFICATE value: "false" resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "2000m" memory: "4Gi" volumeMounts: # /home/runner emptyDir — owned by the non-root runner thanks # to the setup-runner-home initContainer chown. Hosts .dotnet # (setup-dotnet@v4 target) and provides a writable HOME without # forcing a PVC. The PVC mount below at .nuget/packages wins # at that nested path (deeper mount overrides), so the Common # NuGet cache continues to persist across ephemeral pod restarts. - name: runner-home mountPath: /home/runner - name: nuget-cache mountPath: /home/runner/.nuget/packages - name: tmp mountPath: /tmp # Liveness: runner process is alive. livenessProbe: exec: command: - /bin/sh - -c - "pgrep -f Runner.Listener > /dev/null" initialDelaySeconds: 30 periodSeconds: 30 failureThreshold: 3 volumes: - name: runner-home emptyDir: {} - name: nuget-cache persistentVolumeClaim: claimName: github-runner-nuget-cache - name: tmp emptyDir: {} # Restart policy: Always — the Deployment controller handles # re-registration after each ephemeral job completes. restartPolicy: Always --- # Shared.Pos repo-scoped Linux runner. Added 2026-05-20 to unstick the # FlowerCore.Shared.Pos "Build, Test & Publish" workflow, which had been # queued indefinitely after the Sprint 26 Mac POS Phase 1/2 PRs merged # (no fc-build-linux runner was registered to Shared.Pos — GitHub user # accounts have only repo-scoped runners). First concrete instance of the # Sprint 29 Cx-1 Linux-runner-fleet pattern; the full per-repo fleet is # codified by that lane. emptyDir nuget cache (no RWO PVC) so it shares no # volume with the Common runner and needs no node pin. apiVersion: apps/v1 kind: Deployment metadata: name: github-runner-sharedpos namespace: github-runner labels: app.kubernetes.io/name: github-runner-sharedpos app.kubernetes.io/component: runner app.kubernetes.io/part-of: flowercore app.kubernetes.io/managed-by: argocd flowercore.io/created-by: argocd spec: # UN-PARKED 2026-05-21: Shared.Pos #5 fixed the non-root setup-dotnet path # (DOTNET_INSTALL_DIR step-scoped). Re-enabled to run the now-fixable build. replicas: 1 selector: matchLabels: app.kubernetes.io/name: github-runner-sharedpos strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: github-runner-sharedpos app.kubernetes.io/component: runner app.kubernetes.io/part-of: flowercore flowercore.io/created-by: argocd spec: serviceAccountName: github-runner securityContext: runAsNonRoot: true runAsUser: 1001 runAsGroup: 1001 fsGroup: 1001 # Sprint 30 Cl-1 pod-env fix (2026-05-21): see github-runner Deployment # above for full rationale. Mirrored on the Shared.Pos runner so the # per-workflow DOTNET_INSTALL_DIR patch can be retired fleet-wide # rather than re-applied per repo as flipped lanes land. initContainers: - name: setup-runner-home image: busybox:1.36 command: - sh - -c - | set -e mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget chmod -R 755 /home/runner/.dotnet /home/runner/.nuget securityContext: runAsUser: 0 runAsNonRoot: false volumeMounts: - name: runner-home mountPath: /home/runner containers: - name: runner image: myoung34/github-runner:latest imagePullPolicy: Always env: - name: REPO_URL value: "https://github.com/astoltz/FlowerCore.Shared.Pos" - name: RUNNER_NAME_PREFIX value: "rke2-linux-sharedpos" - name: RUNNER_WORKDIR value: "/tmp/runner/work" - name: EPHEMERAL value: "true" - name: LABELS value: "self-hosted,linux,fc-build-linux" - name: ACCESS_TOKEN valueFrom: secretKeyRef: name: github-runner-token key: credential - name: RUN_AS_ROOT value: "false" # Sprint 30 Cl-1 pod-env fix (2026-05-21): retire per-workflow patch. - name: DOTNET_INSTALL_DIR value: "/home/runner/.dotnet" - name: DOTNET_CLI_TELEMETRY_OPTOUT value: "1" - name: NUGET_PACKAGES value: "/home/runner/.nuget/packages" - name: DOTNET_NOLOGO value: "1" - name: DOTNET_GENERATE_ASPNET_CERTIFICATE value: "false" resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "2000m" memory: "4Gi" volumeMounts: # Shared.Pos runner uses emptyDir for nuget cache (no node pin # via RWO PVC). /home/runner emptyDir hosts .dotnet via the # setup-runner-home initContainer chown; the .nuget/packages # emptyDir mount still wins at its nested path. - name: runner-home mountPath: /home/runner - name: nuget-cache mountPath: /home/runner/.nuget/packages - name: tmp mountPath: /tmp livenessProbe: exec: command: - /bin/sh - -c - "pgrep -f Runner.Listener > /dev/null" initialDelaySeconds: 30 periodSeconds: 30 failureThreshold: 3 volumes: - name: runner-home emptyDir: {} - name: nuget-cache emptyDir: {} - name: tmp emptyDir: {} restartPolicy: Always