Compare commits
1 Commits
codex/s59-
...
codex/spri
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
266b9cb8be |
38
apps/github-runner/README.md
Normal file
38
apps/github-runner/README.md
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# github-runner
|
||||||
|
|
||||||
|
ArgoCD-managed repo-scoped Linux GitHub Actions runners for FlowerCore.
|
||||||
|
|
||||||
|
`astoltz` is a GitHub user account, not an organization, so each repository
|
||||||
|
needs its own runner registration. The existing Common runner remains
|
||||||
|
`Deployment/github-runner`; Sprint 29 adds one single-replica Deployment for
|
||||||
|
each top Linux-cost repo:
|
||||||
|
|
||||||
|
- `FlowerCore.Puppet`
|
||||||
|
- `FlowerCore.Signage`
|
||||||
|
- `FlowerCore.DMS`
|
||||||
|
- `FlowerCore.Telephony`
|
||||||
|
- `FlowerCore.Print.Web`
|
||||||
|
- `FlowerCore.Chat`
|
||||||
|
- `FlowerCore.MySQL`
|
||||||
|
- `FlowerCore.Kiosk.Linux`
|
||||||
|
|
||||||
|
Each runner uses `myoung34/github-runner:latest`, `EPHEMERAL=true`, and labels
|
||||||
|
`self-hosted,linux,fc-build-linux`. The shared `github-runner-token` Secret is
|
||||||
|
synced from the existing 1Password item `GitHub PAT (Runner Registration)` and
|
||||||
|
is consumed as `ACCESS_TOKEN`.
|
||||||
|
|
||||||
|
Do not `kubectl apply` this app over ArgoCD. Merge to `main`, let
|
||||||
|
`infra-github-runner` sync, then verify from `noc1`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
kubectl -n github-runner get deploy,pods,pvc
|
||||||
|
|
||||||
|
for repo in FlowerCore.Puppet FlowerCore.Signage FlowerCore.DMS FlowerCore.Telephony FlowerCore.Print.Web FlowerCore.Chat FlowerCore.MySQL FlowerCore.Kiosk.Linux; do
|
||||||
|
gh api "/repos/astoltz/$repo/actions/runners" \
|
||||||
|
--jq '.runners[] | select((.labels[].name == "fc-build-linux") and (.status == "online")) | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
done
|
||||||
|
```
|
||||||
|
|
||||||
|
`LinuxRunnerOffline` is declared in `apps/monitoring/noc-monitoring.yaml` and
|
||||||
|
fires when any Common or top-8 Linux runner deployment has no available replica
|
||||||
|
for 10 minutes.
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -974,6 +974,19 @@ data:
|
|||||||
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
|
||||||
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
|
||||||
|
|
||||||
|
- alert: LinuxRunnerOffline
|
||||||
|
expr: |
|
||||||
|
kube_deployment_status_replicas_available{namespace="github-runner",deployment=~"github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} < 1
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
alert_channel: thermal_print
|
||||||
|
annotations:
|
||||||
|
summary: "Linux GitHub Actions runner offline: {{ $labels.deployment }}"
|
||||||
|
description: "{{ $labels.deployment }} has no available runner pod for 10 minutes. GitHub jobs using [self-hosted, linux, fc-build-linux] for its repo will queue at $0 until the runner returns."
|
||||||
|
runbook_url: "https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md"
|
||||||
|
|
||||||
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
# Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
|
||||||
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
# cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
|
||||||
# outage (21h) hit because no alert fired on the rising multus working
|
# outage (21h) hit because no alert fired on the rising multus working
|
||||||
@@ -3427,6 +3440,33 @@ data:
|
|||||||
relativeTimeRange: {from: 120, to: 0}
|
relativeTimeRange: {from: 120, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
|
- uid: linux-runner-offline
|
||||||
|
title: LinuxRunnerOffline
|
||||||
|
condition: C
|
||||||
|
for: 10m
|
||||||
|
noDataState: Alerting
|
||||||
|
execErrState: OK
|
||||||
|
annotations:
|
||||||
|
summary: Linux GitHub Actions runner offline
|
||||||
|
description: "A repo-scoped fc-build-linux runner deployment has no available pod. Jobs will queue at $0 until ArgoCD/K8s returns the runner."
|
||||||
|
runbook_url: "https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
service: github-runner
|
||||||
|
alert_channel: thermal_print
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: prometheus
|
||||||
|
model: {expr: 'min by(deployment) (kube_deployment_status_replicas_available{namespace="github-runner",deployment=~"github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"})', instant: true, refId: A}
|
||||||
|
- refId: B
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: reduce, expression: A, reducer: last, refId: B}
|
||||||
|
- refId: C
|
||||||
|
relativeTimeRange: {from: 600, to: 0}
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model: {type: threshold, expression: B, conditions: [{evaluator: {params: [1], type: lt}}], refId: C}
|
||||||
- uid: high-cpu
|
- uid: high-cpu
|
||||||
title: High CPU (>85%)
|
title: High CPU (>85%)
|
||||||
condition: C
|
condition: C
|
||||||
|
|||||||
@@ -54,6 +54,18 @@ public sealed class FleetManifestLintTests
|
|||||||
"ttsreader-piper",
|
"ttsreader-piper",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private static readonly IReadOnlyDictionary<string, string> TopLinuxRunnerRepos = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||||
|
{
|
||||||
|
["github-runner-puppet"] = "https://github.com/astoltz/FlowerCore.Puppet",
|
||||||
|
["github-runner-signage"] = "https://github.com/astoltz/FlowerCore.Signage",
|
||||||
|
["github-runner-dms"] = "https://github.com/astoltz/FlowerCore.DMS",
|
||||||
|
["github-runner-telephony"] = "https://github.com/astoltz/FlowerCore.Telephony",
|
||||||
|
["github-runner-print-web"] = "https://github.com/astoltz/FlowerCore.Print.Web",
|
||||||
|
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
|
||||||
|
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
|
||||||
|
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
|
||||||
|
};
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
public void IngressRoutes_MustKeepServiceReferencesInTheSameNamespace()
|
||||||
{
|
{
|
||||||
@@ -187,6 +199,76 @@ public sealed class FleetManifestLintTests
|
|||||||
violations.Should().BeEmpty();
|
violations.Should().BeEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GitHubRunnerFleet_MustRegisterTopLinuxReposAsRepoScopedDeployments()
|
||||||
|
{
|
||||||
|
var deployments = Inventory.Documents
|
||||||
|
.Where(document => document.Kind == "Deployment")
|
||||||
|
.Where(document => document.Namespace == "github-runner")
|
||||||
|
.ToDictionary(document => document.Name, StringComparer.Ordinal);
|
||||||
|
|
||||||
|
foreach (var expectedRunner in TopLinuxRunnerRepos)
|
||||||
|
{
|
||||||
|
deployments.Should().ContainKey(expectedRunner.Key);
|
||||||
|
|
||||||
|
var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
|
||||||
|
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
||||||
|
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
||||||
|
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
||||||
|
EnvValue(container, "ACCESS_TOKEN").Should().BeNull("ACCESS_TOKEN must come from github-runner-token Secret, not a literal");
|
||||||
|
EnvSecretName(container, "ACCESS_TOKEN").Should().Be("github-runner-token");
|
||||||
|
EnvSecretKey(container, "ACCESS_TOKEN").Should().Be("credential");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GitHubRunnerFleet_MustPreserveExistingCommonRunnerShape()
|
||||||
|
{
|
||||||
|
var common = Inventory.Documents
|
||||||
|
.Single(document => document.Kind == "Deployment"
|
||||||
|
&& document.Namespace == "github-runner"
|
||||||
|
&& document.Name == "github-runner");
|
||||||
|
|
||||||
|
var container = common.ContainerMappings().Should().ContainSingle().Subject;
|
||||||
|
EnvValue(container, "REPO_URL").Should().Be("https://github.com/astoltz/FlowerCore.Common");
|
||||||
|
EnvValue(container, "RUNNER_NAME_PREFIX").Should().Be("rke2-linux");
|
||||||
|
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
||||||
|
|
||||||
|
var claimNames = common.MappingSequence("spec", "template", "spec", "volumes")
|
||||||
|
.Select(volume => ManifestNodeExtensions.Scalar(volume, "persistentVolumeClaim", "claimName"))
|
||||||
|
.Where(value => !string.IsNullOrWhiteSpace(value))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
claimNames.Should().Contain("github-runner-nuget-cache");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void GitHubRunnerFleet_MustUseOneRwoCachePerRepoScopedDeployment()
|
||||||
|
{
|
||||||
|
var pvcNames = Inventory.Documents
|
||||||
|
.Where(document => document.Kind == "PersistentVolumeClaim")
|
||||||
|
.Where(document => document.Namespace == "github-runner")
|
||||||
|
.Select(document => document.Name)
|
||||||
|
.ToHashSet(StringComparer.Ordinal);
|
||||||
|
|
||||||
|
foreach (var deploymentName in TopLinuxRunnerRepos.Keys)
|
||||||
|
{
|
||||||
|
var suffix = deploymentName["github-runner-".Length..];
|
||||||
|
pvcNames.Should().Contain($"github-runner-{suffix}-nuget-cache");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Monitoring_MustAlertWhenTopLinuxRunnerDeploymentIsUnavailable()
|
||||||
|
{
|
||||||
|
var monitoring = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "monitoring", "noc-monitoring.yaml"));
|
||||||
|
|
||||||
|
monitoring.Should().Contain("LinuxRunnerOffline");
|
||||||
|
monitoring.Should().Contain("kube_deployment_status_replicas_available{namespace=\"github-runner\"");
|
||||||
|
monitoring.Should().Contain("github-runner(|-(puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
|
||||||
|
monitoring.Should().Contain("runbook_url: \"https://gitea.iamworkin.lan/bluejay/FlowerCore.Notes/src/branch/master/docs/infrastructure/self-hosted-runner-fleet.md\"");
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
public void StatefulSets_WithVolumeClaimTemplates_MustDeclareFilesystemDefaults()
|
||||||
{
|
{
|
||||||
@@ -314,6 +396,31 @@ public sealed class FleetManifestLintTests
|
|||||||
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
$"{document.Descriptor} container '{containerName}' still uses {probeKey}.httpGet on /health.",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static string? EnvValue(YamlMappingNode container, string name)
|
||||||
|
{
|
||||||
|
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string? EnvSecretName(YamlMappingNode container, string name)
|
||||||
|
{
|
||||||
|
return EnvMapping(container, name) is { } env
|
||||||
|
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "name")
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string? EnvSecretKey(YamlMappingNode container, string name)
|
||||||
|
{
|
||||||
|
return EnvMapping(container, name) is { } env
|
||||||
|
? ManifestNodeExtensions.Scalar(env, "valueFrom", "secretKeyRef", "key")
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static YamlMappingNode? EnvMapping(YamlMappingNode container, string name)
|
||||||
|
{
|
||||||
|
return ManifestNodeExtensions.MappingSequence(container, "env")
|
||||||
|
.SingleOrDefault(env => string.Equals(ManifestNodeExtensions.Scalar(env, "name"), name, StringComparison.Ordinal));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
internal sealed class ManifestInventory
|
internal sealed class ManifestInventory
|
||||||
|
|||||||
Reference in New Issue
Block a user