Compare commits
1 Commits
sprint41/c
...
sprint37/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a4a8264d9 |
27
apps/brochure/README.md
Normal file
27
apps/brochure/README.md
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# FlowerCore Brochure
|
||||||
|
|
||||||
|
`apps/brochure` hosts the public brochure split from `FlowerCore.Intranet.Web`.
|
||||||
|
ArgoCD's `apps/*` ApplicationSet will create `infra-brochure` after this
|
||||||
|
directory lands on `main`.
|
||||||
|
|
||||||
|
## Runtime
|
||||||
|
|
||||||
|
- Host: `https://brochure.flowercore.io`
|
||||||
|
- Namespace: `brochure`
|
||||||
|
- Deployment: `brochure-web`
|
||||||
|
- Image: `localhost/fc-brochure-web:v20260524-sprint32`
|
||||||
|
- Port: `8080`
|
||||||
|
- Public route method allowlist: `GET` and `HEAD`
|
||||||
|
|
||||||
|
## Operator Actions
|
||||||
|
|
||||||
|
1. Publish and import `localhost/fc-brochure-web:v20260524-sprint32` to every
|
||||||
|
RKE2 node before sync, using the same podman save + `ctr images import`
|
||||||
|
flow as the Intranet deployment.
|
||||||
|
2. Create the Cloudflare DNS record for `brochure.flowercore.io` pointing at
|
||||||
|
the FlowerCore public edge.
|
||||||
|
3. Verify `infra-brochure` appears in ArgoCD, the certificate becomes Ready,
|
||||||
|
and `GET https://brochure.flowercore.io/` returns `200`.
|
||||||
|
|
||||||
|
The route intentionally does not expose `/ops/*` or `/admin/*`; the Brochure
|
||||||
|
web app returns `404` for those paths and Traefik only forwards read methods.
|
||||||
131
apps/brochure/brochure.yaml
Normal file
131
apps/brochure/brochure.yaml
Normal file
@@ -0,0 +1,131 @@
|
|||||||
|
# FlowerCore Brochure public host
|
||||||
|
#
|
||||||
|
# Thin Blazor host for public What's New, walkthrough, and gallery content
|
||||||
|
# carved out of FlowerCore.Intranet.Web. The ApplicationSet creates
|
||||||
|
# infra-brochure from this directory after merge.
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: brochure
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: brochure-web
|
||||||
|
namespace: brochure
|
||||||
|
labels:
|
||||||
|
app: brochure-web
|
||||||
|
app.kubernetes.io/name: brochure-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 3
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: brochure-web
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: brochure-web
|
||||||
|
app.kubernetes.io/name: brochure-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: brochure-web
|
||||||
|
image: localhost/fc-brochure-web:v20260524-sprint32
|
||||||
|
imagePullPolicy: Never
|
||||||
|
ports:
|
||||||
|
- containerPort: 8080
|
||||||
|
name: http
|
||||||
|
env:
|
||||||
|
- name: ASPNETCORE_ENVIRONMENT
|
||||||
|
value: Production
|
||||||
|
- name: ASPNETCORE_URLS
|
||||||
|
value: "http://+:8080"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "25m"
|
||||||
|
memory: "128Mi"
|
||||||
|
limits:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "512Mi"
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1654
|
||||||
|
runAsGroup: 1654
|
||||||
|
allowPrivilegeEscalation: false
|
||||||
|
readOnlyRootFilesystem: true
|
||||||
|
capabilities:
|
||||||
|
drop:
|
||||||
|
- ALL
|
||||||
|
volumeMounts:
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
volumes:
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: brochure-web
|
||||||
|
namespace: brochure
|
||||||
|
labels:
|
||||||
|
app: brochure-web
|
||||||
|
app.kubernetes.io/name: brochure-web
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: brochure-web
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 8080
|
||||||
|
targetPort: http
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: Certificate
|
||||||
|
metadata:
|
||||||
|
name: brochure-web-tls
|
||||||
|
namespace: brochure
|
||||||
|
spec:
|
||||||
|
secretName: brochure-web-tls
|
||||||
|
issuerRef:
|
||||||
|
name: step-ca-acme
|
||||||
|
kind: ClusterIssuer
|
||||||
|
dnsNames:
|
||||||
|
- brochure.flowercore.io
|
||||||
|
duration: 720h
|
||||||
|
renewBefore: 240h
|
||||||
|
---
|
||||||
|
apiVersion: traefik.io/v1alpha1
|
||||||
|
kind: IngressRoute
|
||||||
|
metadata:
|
||||||
|
name: brochure-web-public
|
||||||
|
namespace: brochure
|
||||||
|
spec:
|
||||||
|
entryPoints:
|
||||||
|
- websecure
|
||||||
|
routes:
|
||||||
|
- match: Host(`brochure.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
||||||
|
kind: Rule
|
||||||
|
services:
|
||||||
|
- name: brochure-web
|
||||||
|
port: 8080
|
||||||
|
tls:
|
||||||
|
secretName: brochure-web-tls
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
# Step issuer for FlowerCore.DeviceManagement runtime mTLS leaves.
|
|
||||||
#
|
|
||||||
# Requires the smallstep step-issuer CRDs/controller:
|
|
||||||
# stepclusterissuers.certmanager.step.sm
|
|
||||||
# The provisioner password lives in the live cert-manager Secret below; do not
|
|
||||||
# commit the password or generated private key material to this repo.
|
|
||||||
apiVersion: certmanager.step.sm/v1beta1
|
|
||||||
kind: StepClusterIssuer
|
|
||||||
metadata:
|
|
||||||
name: step-ca-agent
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: step-ca-agent
|
|
||||||
app.kubernetes.io/component: pki
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
app.kubernetes.io/managed-by: argocd
|
|
||||||
flowercore.io/tenant-id: system
|
|
||||||
flowercore.io/created-by: bluejay-infra
|
|
||||||
annotations:
|
|
||||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
|
||||||
flowercore.io/provisioner-source: profile::pki::stepca
|
|
||||||
flowercore.io/secret-source: cert-manager/step-ca-agent-provisioner-password
|
|
||||||
spec:
|
|
||||||
url: https://10.0.56.10:9443
|
|
||||||
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUJ4RENDQVdxZ0F3SUJBZ0lSQVBZMzU3RzZvdzZ6TUFMNSs0YlMya2t3Q2dZSUtvWkl6ajBFQXdJd1FERWEKTUJnR0ExVUVDaE1SU1VGdFYyOXlhMmx1SUVGRFRVVWdRMEV4SWpBZ0JnTlZCQU1UR1VsQmJWZHZjbXRwYmlCQgpRMDFGSUVOQklGSnZiM1FnUTBFd0hoY05Nall3TXpBNE1UZ3dOekV4V2hjTk16WXdNekExTVRnd056RXhXakJBCk1Sb3dHQVlEVlFRS0V4RkpRVzFYYjNKcmFXNGdRVU5OUlNCRFFURWlNQ0FHQTFVRUF4TVpTVUZ0VjI5eWEybHUKSUVGRFRVVWdRMEVnVW05dmRDQkRRVEJaTUJNR0J5cUdTTTQ5QWdFR0NDcUdTTTQ5QXdFSEEwSUFCSjJuMDRYMQpKWm81WmRxL2kxSWR2OCtmcXdaeUF6Qmg3d2hicWowU1dzSkw4VVdSYWJDTXFZQ3M3K2RYTzB4UlN6cWt3RkRMCngrdm9vT2FpOFJnUk5oYWpSVEJETUE0R0ExVWREd0VCL3dRRUF3SUJCakFTQmdOVkhSTUJBZjhFQ0RBR0FRSC8KQWdFQk1CMEdBMVVkRGdRV0JCUm51UFBRUjZpTS9INnZPbHVpVTNTeWdheXo4akFLQmdncWhrak9QUVFEQWdOSQpBREJGQWlFQXJRSzlkWVBHbUFac2RZbmp6aXVGVlZFNU5LWlVjY2VZdkdmR0MrdExYVXNDSUF1ZEYyekpyQ1JxCjNtSzUwWlpFVC9md1RrSndpRUY0ODI0bWpQOHAxQ0tNCi0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
|
|
||||||
provisioner:
|
|
||||||
name: step-ca-agent
|
|
||||||
kid: RF3A9welUYVOWBX8tr19aWyA2kQlxoGZN1dRwTElUEM
|
|
||||||
passwordRef:
|
|
||||||
name: step-ca-agent-provisioner-password
|
|
||||||
namespace: cert-manager
|
|
||||||
key: password
|
|
||||||
@@ -47,7 +47,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: operator
|
- name: operator
|
||||||
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
|
image: localhost/fc-devicemgmt-operator:v20260512-cx5
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
|
|||||||
@@ -4,22 +4,6 @@
|
|||||||
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||||
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||||
# nodes before letting ArgoCD sync a live rollout.
|
# nodes before letting ArgoCD sync a live rollout.
|
||||||
#
|
|
||||||
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
|
||||||
# The Web pod cannot start until TWO upstream gaps close:
|
|
||||||
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
|
||||||
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
|
||||||
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
|
||||||
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
|
||||||
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
|
||||||
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
|
||||||
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
|
||||||
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
|
||||||
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
|
||||||
# password configured for the MySQL user.
|
|
||||||
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
|
||||||
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
|
||||||
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
@@ -36,7 +20,7 @@ metadata:
|
|||||||
annotations:
|
annotations:
|
||||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
spec:
|
spec:
|
||||||
replicas: 0
|
replicas: 2
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
|||||||
@@ -28,6 +28,10 @@ Sprint 32 final long-tail wave adds 16 two-replica Deployments:
|
|||||||
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
|
`FlowerCore.Provisioning`, `FlowerCore.Redis`, `FlowerCore.MessageBoard`, and
|
||||||
`FlowerCore.MenuBoard`.
|
`FlowerCore.MenuBoard`.
|
||||||
|
|
||||||
|
Sprint 37 Cx-2 closes the audited Linux runner gaps for
|
||||||
|
`FlowerCore.DeviceManagement` and `FlowerCore.WorldBuilder` with the same
|
||||||
|
two-replica `emptyDir` pattern.
|
||||||
|
|
||||||
## Post-Merge Proof
|
## Post-Merge Proof
|
||||||
|
|
||||||
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
After the PR is merged and ArgoCD syncs, verify the runner fleet:
|
||||||
@@ -47,7 +51,7 @@ for repo in FlowerCore.Common FlowerCore.Shared.Pos FlowerCore.Puppet FlowerCore
|
|||||||
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
|
FlowerCore.Distribution FlowerCore.Scoreboard FlowerCore.SegmentDisplay \
|
||||||
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
|
FlowerCore.Signage.Contracts FlowerCore.SignalControl FlowerCore.Intranet.Web \
|
||||||
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
|
FlowerCore.Provisioning FlowerCore.Redis FlowerCore.MessageBoard \
|
||||||
FlowerCore.MenuBoard; do
|
FlowerCore.MenuBoard FlowerCore.DeviceManagement FlowerCore.WorldBuilder; do
|
||||||
echo "=== $repo ==="
|
echo "=== $repo ==="
|
||||||
gh api "/repos/astoltz/$repo/actions/runners" \
|
gh api "/repos/astoltz/$repo/actions/runners" \
|
||||||
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
--jq '.runners[] | select(.labels[].name == "fc-build-linux") | {name,status,busy,labels:[.labels[].name]}'
|
||||||
@@ -64,6 +68,20 @@ gh run list --repo astoltz/FlowerCore.Shared.Pos \
|
|||||||
If the latest run is still queued after runner registration, rerun the workflow
|
If the latest run is still queued after runner registration, rerun the workflow
|
||||||
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
from GitHub Actions and verify it lands on an `rke2-linux-*` runner.
|
||||||
|
|
||||||
|
## Sprint 37 Cx-2 Gap Audit
|
||||||
|
|
||||||
|
The 2026-05-18 GitHub workflow scan found these remaining repos with
|
||||||
|
`runs-on: [self-hosted, linux, fc-build-linux]` but no K8s runner Deployment:
|
||||||
|
`FlowerCore.AiStation.Linux`, `FlowerCore.PHP`, `FlowerCore.PiManager`,
|
||||||
|
`FlowerCore.Shared.Barcodes`, `FlowerCore.Shared.Lookup`,
|
||||||
|
`FlowerCore.Shared.Nodes`, `FlowerCore.Shared.PrintClient`,
|
||||||
|
`FlowerCore.Shared.Relay`, `FlowerCore.Shared.ShowRunner`, and
|
||||||
|
`FlowerCore.Shared.Storage`.
|
||||||
|
|
||||||
|
Mixed/platform repos also have Linux workflow legs but need owner review before
|
||||||
|
adding Linux runner Deployments: `FlowerCore.Library.Mac`,
|
||||||
|
`FlowerCore.Signage.Agent.AppleTv`, and `FlowerCore.Signage.Player.Wpf`.
|
||||||
|
|
||||||
## Failure Notes
|
## Failure Notes
|
||||||
|
|
||||||
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
- `actions/setup-dotnet` permission error at `/usr/share/dotnet`: check that
|
||||||
|
|||||||
@@ -16,6 +16,8 @@
|
|||||||
# DNS, Distribution, Scoreboard, SegmentDisplay, Signage.Contracts,
|
# DNS, Distribution, Scoreboard, SegmentDisplay, Signage.Contracts,
|
||||||
# SignalControl, Intranet.Web, Provisioning, Redis, MessageBoard, MenuBoard
|
# SignalControl, Intranet.Web, Provisioning, Redis, MessageBoard, MenuBoard
|
||||||
# (Sprint 32 final long-tail wave; two replicas each, emptyDir cache)
|
# (Sprint 32 final long-tail wave; two replicas each, emptyDir cache)
|
||||||
|
# FlowerCore.DeviceManagement, WorldBuilder (Sprint 37 Cx-2 runner gap
|
||||||
|
# closure; two replicas each, emptyDir cache)
|
||||||
#
|
#
|
||||||
# Non-root CI safety:
|
# Non-root CI safety:
|
||||||
# Runner pods run as uid 1001. HOME, DOTNET_INSTALL_DIR, DOTNET_CLI_HOME,
|
# Runner pods run as uid 1001. HOME, DOTNET_INSTALL_DIR, DOTNET_CLI_HOME,
|
||||||
@@ -3767,9 +3769,271 @@ spec:
|
|||||||
- name: tmp
|
- name: tmp
|
||||||
emptyDir: {}
|
emptyDir: {}
|
||||||
restartPolicy: Always
|
restartPolicy: Always
|
||||||
|
---
|
||||||
|
# Runner for FlowerCore.DeviceManagement. Added 2026-05-18 (Sprint 37 Cx-2)
|
||||||
|
# to close the Linux CI capacity gap for the DM service-tier workflows. Mirrors
|
||||||
|
# the Sprint 32 long-tail emptyDir pattern: two replicas, shared
|
||||||
|
# 1Password-backed ACCESS_TOKEN, and the common ServiceAccount.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: github-runner-devicemgmt
|
||||||
|
namespace: github-runner
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: github-runner-devicemgmt
|
||||||
|
app.kubernetes.io/component: runner
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/created-by: argocd
|
||||||
|
flowercore.io/runner-repo: devicemgmt
|
||||||
|
flowercore.io/github-repo: FlowerCore.DeviceManagement
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: github-runner-devicemgmt
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: github-runner-devicemgmt
|
||||||
|
app.kubernetes.io/component: runner
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
flowercore.io/created-by: argocd
|
||||||
|
flowercore.io/runner-repo: devicemgmt
|
||||||
|
flowercore.io/github-repo: FlowerCore.DeviceManagement
|
||||||
|
spec:
|
||||||
|
serviceAccountName: github-runner
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1001
|
||||||
|
runAsGroup: 1001
|
||||||
|
fsGroup: 1001
|
||||||
|
initContainers:
|
||||||
|
- name: setup-runner-home
|
||||||
|
image: busybox:1.36
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
|
||||||
|
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
|
||||||
|
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsNonRoot: false
|
||||||
|
volumeMounts:
|
||||||
|
- name: runner-home
|
||||||
|
mountPath: /home/runner
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
image: myoung34/github-runner:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
env:
|
||||||
|
- name: REPO_URL
|
||||||
|
value: "https://github.com/astoltz/FlowerCore.DeviceManagement"
|
||||||
|
- name: RUNNER_NAME_PREFIX
|
||||||
|
value: "rke2-linux-devicemgmt"
|
||||||
|
- name: RUNNER_WORKDIR
|
||||||
|
value: "/tmp/runner/work"
|
||||||
|
- name: EPHEMERAL
|
||||||
|
value: "true"
|
||||||
|
- name: LABELS
|
||||||
|
value: "self-hosted,linux,fc-build-linux"
|
||||||
|
- name: HOME
|
||||||
|
value: "/home/runner"
|
||||||
|
- name: DOTNET_INSTALL_DIR
|
||||||
|
value: "/home/runner/.dotnet"
|
||||||
|
- name: DOTNET_CLI_TELEMETRY_OPTOUT
|
||||||
|
value: "1"
|
||||||
|
- name: DOTNET_NOLOGO
|
||||||
|
value: "1"
|
||||||
|
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
|
||||||
|
value: "false"
|
||||||
|
- name: DOTNET_CLI_HOME
|
||||||
|
value: "/home/runner"
|
||||||
|
- name: NUGET_PACKAGES
|
||||||
|
value: "/home/runner/.nuget/packages"
|
||||||
|
- name: XDG_CACHE_HOME
|
||||||
|
value: "/home/runner/.cache"
|
||||||
|
- name: RUNNER_TOOL_CACHE
|
||||||
|
value: "/home/runner/_tool"
|
||||||
|
- name: ACCESS_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: github-runner-token
|
||||||
|
key: credential
|
||||||
|
- name: RUN_AS_ROOT
|
||||||
|
value: "false"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "1Gi"
|
||||||
|
limits:
|
||||||
|
cpu: "2000m"
|
||||||
|
memory: "4Gi"
|
||||||
|
volumeMounts:
|
||||||
|
- name: runner-home
|
||||||
|
mountPath: /home/runner
|
||||||
|
- name: nuget-cache
|
||||||
|
mountPath: /home/runner/.nuget/packages
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- "pgrep -f Runner.Listener > /dev/null"
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
volumes:
|
||||||
|
- name: runner-home
|
||||||
|
emptyDir: {}
|
||||||
|
- name: nuget-cache
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
restartPolicy: Always
|
||||||
|
---
|
||||||
|
# Runner for FlowerCore.WorldBuilder. Added 2026-05-18 (Sprint 37 Cx-2)
|
||||||
|
# to unblock WorldBuilder Linux CI jobs after the runner fleet audit found no
|
||||||
|
# repo-scoped deployment for the GitHub repo. Mirrors the Sprint 32 long-tail
|
||||||
|
# emptyDir pattern: two replicas, shared 1Password-backed ACCESS_TOKEN, and
|
||||||
|
# the common ServiceAccount.
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: github-runner-worldbuilder
|
||||||
|
namespace: github-runner
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: github-runner-worldbuilder
|
||||||
|
app.kubernetes.io/component: runner
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
app.kubernetes.io/managed-by: argocd
|
||||||
|
flowercore.io/created-by: argocd
|
||||||
|
flowercore.io/runner-repo: worldbuilder
|
||||||
|
flowercore.io/github-repo: FlowerCore.WorldBuilder
|
||||||
|
spec:
|
||||||
|
replicas: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: github-runner-worldbuilder
|
||||||
|
strategy:
|
||||||
|
type: Recreate
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: github-runner-worldbuilder
|
||||||
|
app.kubernetes.io/component: runner
|
||||||
|
app.kubernetes.io/part-of: flowercore
|
||||||
|
flowercore.io/created-by: argocd
|
||||||
|
flowercore.io/runner-repo: worldbuilder
|
||||||
|
flowercore.io/github-repo: FlowerCore.WorldBuilder
|
||||||
|
spec:
|
||||||
|
serviceAccountName: github-runner
|
||||||
|
securityContext:
|
||||||
|
runAsNonRoot: true
|
||||||
|
runAsUser: 1001
|
||||||
|
runAsGroup: 1001
|
||||||
|
fsGroup: 1001
|
||||||
|
initContainers:
|
||||||
|
- name: setup-runner-home
|
||||||
|
image: busybox:1.36
|
||||||
|
command:
|
||||||
|
- sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
mkdir -p /home/runner/.dotnet /home/runner/.nuget/packages /home/runner/.nuget/NuGet
|
||||||
|
chown -R 1001:1001 /home/runner/.dotnet /home/runner/.nuget
|
||||||
|
chmod -R 755 /home/runner/.dotnet /home/runner/.nuget
|
||||||
|
securityContext:
|
||||||
|
runAsUser: 0
|
||||||
|
runAsNonRoot: false
|
||||||
|
volumeMounts:
|
||||||
|
- name: runner-home
|
||||||
|
mountPath: /home/runner
|
||||||
|
containers:
|
||||||
|
- name: runner
|
||||||
|
image: myoung34/github-runner:latest
|
||||||
|
imagePullPolicy: Always
|
||||||
|
env:
|
||||||
|
- name: REPO_URL
|
||||||
|
value: "https://github.com/astoltz/FlowerCore.WorldBuilder"
|
||||||
|
- name: RUNNER_NAME_PREFIX
|
||||||
|
value: "rke2-linux-worldbuilder"
|
||||||
|
- name: RUNNER_WORKDIR
|
||||||
|
value: "/tmp/runner/work"
|
||||||
|
- name: EPHEMERAL
|
||||||
|
value: "true"
|
||||||
|
- name: LABELS
|
||||||
|
value: "self-hosted,linux,fc-build-linux"
|
||||||
|
- name: HOME
|
||||||
|
value: "/home/runner"
|
||||||
|
- name: DOTNET_INSTALL_DIR
|
||||||
|
value: "/home/runner/.dotnet"
|
||||||
|
- name: DOTNET_CLI_TELEMETRY_OPTOUT
|
||||||
|
value: "1"
|
||||||
|
- name: DOTNET_NOLOGO
|
||||||
|
value: "1"
|
||||||
|
- name: DOTNET_GENERATE_ASPNET_CERTIFICATE
|
||||||
|
value: "false"
|
||||||
|
- name: DOTNET_CLI_HOME
|
||||||
|
value: "/home/runner"
|
||||||
|
- name: NUGET_PACKAGES
|
||||||
|
value: "/home/runner/.nuget/packages"
|
||||||
|
- name: XDG_CACHE_HOME
|
||||||
|
value: "/home/runner/.cache"
|
||||||
|
- name: RUNNER_TOOL_CACHE
|
||||||
|
value: "/home/runner/_tool"
|
||||||
|
- name: ACCESS_TOKEN
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: github-runner-token
|
||||||
|
key: credential
|
||||||
|
- name: RUN_AS_ROOT
|
||||||
|
value: "false"
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "500m"
|
||||||
|
memory: "1Gi"
|
||||||
|
limits:
|
||||||
|
cpu: "2000m"
|
||||||
|
memory: "4Gi"
|
||||||
|
volumeMounts:
|
||||||
|
- name: runner-home
|
||||||
|
mountPath: /home/runner
|
||||||
|
- name: nuget-cache
|
||||||
|
mountPath: /home/runner/.nuget/packages
|
||||||
|
- name: tmp
|
||||||
|
mountPath: /tmp
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- "pgrep -f Runner.Listener > /dev/null"
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 30
|
||||||
|
failureThreshold: 3
|
||||||
|
volumes:
|
||||||
|
- name: runner-home
|
||||||
|
emptyDir: {}
|
||||||
|
- name: nuget-cache
|
||||||
|
emptyDir:
|
||||||
|
sizeLimit: 2Gi
|
||||||
|
- name: tmp
|
||||||
|
emptyDir: {}
|
||||||
|
restartPolicy: Always
|
||||||
|
|
||||||
# Long-tail runner pattern:
|
# Long-tail runner pattern:
|
||||||
#
|
#
|
||||||
# Sprint 32 added the final 16 long-tail repo-scoped Deployments above. Keep
|
# Sprint 32 added the final 16 long-tail repo-scoped Deployments, and Sprint 37
|
||||||
# Common as the only PVC-backed runner at replicas: 1. Any future multi-replica
|
# added the DM + WorldBuilder runner gap closures above. Keep Common as the
|
||||||
# runner must use per-pod emptyDir caches, not a shared ReadWriteOnce PVC.
|
# only PVC-backed runner at replicas: 1. Any future multi-replica runner must
|
||||||
|
# use per-pod emptyDir caches, not a shared ReadWriteOnce PVC.
|
||||||
|
|||||||
@@ -729,7 +729,7 @@ data:
|
|||||||
expr: |
|
expr: |
|
||||||
kube_deployment_status_replicas_ready{
|
kube_deployment_status_replicas_ready{
|
||||||
namespace="github-runner",
|
namespace="github-runner",
|
||||||
deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"
|
deployment=~"github-runner(|-.+)"
|
||||||
} == 0
|
} == 0
|
||||||
for: 5m
|
for: 5m
|
||||||
labels:
|
labels:
|
||||||
@@ -1273,55 +1273,24 @@ metadata:
|
|||||||
data:
|
data:
|
||||||
notify.py: |
|
notify.py: |
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
|
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
|
||||||
|
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
|
||||||
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
|
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
|
||||||
/api/print/alert. Thermal printing is BATCHED into hourly digests by
|
|
||||||
default so the printer no longer spam-fires per Grafana webhook.
|
|
||||||
|
|
||||||
Routing (per Grafana webhook alert):
|
|
||||||
- IRC: always per-event (operator likes the stream)
|
|
||||||
- Thermal printer:
|
|
||||||
* severity in {critical,disaster,page} OR
|
|
||||||
label alert_channel=thermal_print_immediate -> print NOW
|
|
||||||
* label alert_channel=thermal_print -> enqueue into hourly digest
|
|
||||||
* everything else -> IRC only
|
|
||||||
- RESOLVED webhooks remove the alert from the digest buffer
|
|
||||||
|
|
||||||
Env vars (defaults preserve old behavior on first deploy):
|
|
||||||
THERMAL_PRINT_ENABLED default "true" - master kill switch
|
|
||||||
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
|
|
||||||
BATCH_MAX_PENDING default "50" - force-flush threshold
|
|
||||||
|
|
||||||
HTTP surface:
|
|
||||||
POST / - Grafana webhook entry
|
|
||||||
POST /flush - manual digest flush (idempotent)
|
|
||||||
GET / - status + config + buffer depth + stats
|
|
||||||
"""
|
"""
|
||||||
import json, os, socket, sys, threading, time
|
import json, socket, sys, time
|
||||||
from collections import defaultdict
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
from urllib.error import URLError
|
||||||
|
|
||||||
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
|
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
|
||||||
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
|
IRC_PORT = 6667
|
||||||
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
|
IRC_NICK = "grafana-bot"
|
||||||
|
IRC_CHANNEL = "#alerts"
|
||||||
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
|
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
|
||||||
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
|
PRINT_ENABLED = True
|
||||||
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
|
|
||||||
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
|
|
||||||
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
|
|
||||||
|
|
||||||
_buffer_lock = threading.Lock()
|
|
||||||
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
|
|
||||||
_last_flush_time = time.time()
|
|
||||||
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
|
|
||||||
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
|
|
||||||
"buffer_resolved": 0, "started_at": time.time()}
|
|
||||||
|
|
||||||
def send_irc(message):
|
def send_irc(message):
|
||||||
|
"""Connect, handle PING, join, send, quit."""
|
||||||
try:
|
try:
|
||||||
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
||||||
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
||||||
@@ -1354,137 +1323,52 @@ data:
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
sock.sendall(b"QUIT :alert delivered\r\n")
|
sock.sendall(b"QUIT :alert delivered\r\n")
|
||||||
sock.close()
|
sock.close()
|
||||||
_stats["irc_sent"] += 1
|
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def post_thermal(payload, kind):
|
def send_thermal_print(alert):
|
||||||
if not THERMAL_PRINT_ENABLED:
|
if not PRINT_ENABLED: return
|
||||||
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
|
labels = alert.get("labels", {})
|
||||||
return False
|
annotations = alert.get("annotations", {})
|
||||||
|
status = alert.get("status", "firing").upper()
|
||||||
|
summary = annotations.get("summary", "")
|
||||||
|
description = annotations.get("description", "")
|
||||||
|
runbook = annotations.get("runbook", "")
|
||||||
|
# Build a useful message: summary + description + runbook steps
|
||||||
|
parts = []
|
||||||
|
if summary: parts.append(summary)
|
||||||
|
if description and description != summary: parts.append(description)
|
||||||
|
if runbook: parts.append("STEPS: " + runbook)
|
||||||
|
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
|
||||||
|
payload = {
|
||||||
|
"title": labels.get("alertname", "Unknown"),
|
||||||
|
"severity": labels.get("severity", "warning").capitalize(),
|
||||||
|
"host": labels.get("instance", labels.get("host", "unknown")),
|
||||||
|
"message": message,
|
||||||
|
"eventId": alert.get("fingerprint", ""),
|
||||||
|
"source": "Grafana",
|
||||||
|
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
|
||||||
|
"acknowledged": False
|
||||||
|
}
|
||||||
try:
|
try:
|
||||||
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
||||||
headers={"Content-Type": "application/json"}, method="POST")
|
headers={"Content-Type": "application/json"}, method="POST")
|
||||||
resp = urlopen(req, timeout=10)
|
resp = urlopen(req, timeout=10)
|
||||||
if kind == "immediate": _stats["print_immediate"] += 1
|
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
|
||||||
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
|
||||||
return False
|
|
||||||
|
|
||||||
def fingerprint_of(alert):
|
def should_print(alert):
|
||||||
fp = alert.get("fingerprint", "")
|
|
||||||
if fp: return fp
|
|
||||||
labels = alert.get("labels", {})
|
labels = alert.get("labels", {})
|
||||||
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
|
if labels.get("alert_channel") == "thermal_print": return True
|
||||||
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
|
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
|
||||||
|
if alert.get("status", "").upper() == "RESOLVED": return False
|
||||||
def is_critical(alert):
|
return False
|
||||||
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
|
|
||||||
|
|
||||||
def is_immediate_label(alert):
|
|
||||||
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
|
|
||||||
|
|
||||||
def is_batched_label(alert):
|
|
||||||
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
|
|
||||||
|
|
||||||
def add_to_digest(alert):
|
|
||||||
"""Add an alert to the digest buffer. Returns True if the buffer GREW
|
|
||||||
(new fingerprint), False if it was a dedup, resolution, or no-op.
|
|
||||||
"""
|
|
||||||
if not THERMAL_PRINT_ENABLED: return False
|
|
||||||
fp = fingerprint_of(alert)
|
|
||||||
status = alert.get("status", "firing").lower()
|
|
||||||
with _buffer_lock:
|
|
||||||
if status == "resolved":
|
|
||||||
if fp in _buffer:
|
|
||||||
del _buffer[fp]
|
|
||||||
_stats["buffer_resolved"] += 1
|
|
||||||
return False
|
|
||||||
if fp in _buffer:
|
|
||||||
_buffer[fp]["last_seen"] = time.time()
|
|
||||||
_buffer[fp]["alert"] = alert
|
|
||||||
_stats["buffer_dedup"] += 1
|
|
||||||
return False
|
|
||||||
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
|
|
||||||
_stats["buffer_added"] += 1
|
|
||||||
return True
|
|
||||||
|
|
||||||
def build_digest_payload():
|
|
||||||
with _buffer_lock:
|
|
||||||
items = list(_buffer.values())
|
|
||||||
if not items: return None
|
|
||||||
by_name = defaultdict(list)
|
|
||||||
for item in items:
|
|
||||||
labels = item["alert"].get("labels", {})
|
|
||||||
by_name[labels.get("alertname", "Unknown")].append(item)
|
|
||||||
lines = []
|
|
||||||
for name, group in sorted(by_name.items()):
|
|
||||||
targets = []
|
|
||||||
for it in group[:5]:
|
|
||||||
labels = it["alert"].get("labels", {})
|
|
||||||
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
|
|
||||||
or labels.get("statefulset") or labels.get("namespace") or "?")
|
|
||||||
targets.append(t)
|
|
||||||
more = f" (+{len(group)-5})" if len(group) > 5 else ""
|
|
||||||
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
|
|
||||||
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
|
|
||||||
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
|
||||||
title = f"Alert digest: {len(items)} firing"
|
|
||||||
body = "\n".join([
|
|
||||||
f"=== {title} ===",
|
|
||||||
f"as of {now}",
|
|
||||||
"",
|
|
||||||
*lines,
|
|
||||||
"",
|
|
||||||
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
|
|
||||||
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
|
|
||||||
])
|
|
||||||
return {"title": title, "severity": "Warning", "host": "monitoring",
|
|
||||||
"message": body, "eventId": f"digest-{int(time.time())}",
|
|
||||||
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
|
|
||||||
|
|
||||||
def flush_digest():
|
|
||||||
payload = build_digest_payload()
|
|
||||||
if payload is None:
|
|
||||||
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
|
|
||||||
return False
|
|
||||||
sent = post_thermal(payload, "digest")
|
|
||||||
with _buffer_lock:
|
|
||||||
_buffer.clear()
|
|
||||||
if sent: _stats["digest_flushed"] += 1
|
|
||||||
return sent
|
|
||||||
|
|
||||||
def digest_loop():
|
|
||||||
global _last_flush_time
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
now = time.time()
|
|
||||||
elapsed = now - _last_flush_time
|
|
||||||
if elapsed >= BATCH_INTERVAL_MIN * 60:
|
|
||||||
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
|
|
||||||
flush_digest()
|
|
||||||
_last_flush_time = now
|
|
||||||
elif len(_buffer) >= BATCH_MAX_PENDING:
|
|
||||||
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
|
|
||||||
flush_digest()
|
|
||||||
_last_flush_time = now
|
|
||||||
time.sleep(15)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
|
|
||||||
time.sleep(60)
|
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
if self.path == "/flush":
|
|
||||||
ok = flush_digest()
|
|
||||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
|
||||||
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
|
|
||||||
return
|
|
||||||
_stats["webhooks_received"] += 1
|
|
||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
body = json.loads(self.rfile.read(length)) if length else {}
|
body = json.loads(self.rfile.read(length)) if length else {}
|
||||||
for alert in body.get("alerts", []):
|
for alert in body.get("alerts", []):
|
||||||
@@ -1499,56 +1383,22 @@ data:
|
|||||||
msg = f"{icon}{sev_tag} {name}: {summary}"
|
msg = f"{icon}{sev_tag} {name}: {summary}"
|
||||||
if desc: msg += f"\n {desc}"
|
if desc: msg += f"\n {desc}"
|
||||||
send_irc(msg)
|
send_irc(msg)
|
||||||
# Thermal routing — EVERYTHING (including criticals) goes into
|
if should_print(alert): send_thermal_print(alert)
|
||||||
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
|
self.send_response(200)
|
||||||
# label bypasses, and even that flushes-the-current-digest rather
|
self.send_header("Content-Type", "application/json")
|
||||||
# than printing a standalone job, so the same fingerprint can't
|
self.end_headers()
|
||||||
# spam the printer per webhook cycle.
|
|
||||||
if status == "RESOLVED":
|
|
||||||
add_to_digest(alert) # removes from buffer
|
|
||||||
continue
|
|
||||||
if is_immediate_label(alert):
|
|
||||||
# Explicit opt-in for "paper this NOW" — first arrival of a
|
|
||||||
# new fingerprint triggers an immediate digest flush; repeat
|
|
||||||
# webhooks for the same fingerprint dedupe in the buffer
|
|
||||||
# until the next interval or until the alert resolves.
|
|
||||||
new_in_buffer = add_to_digest(alert)
|
|
||||||
if new_in_buffer:
|
|
||||||
global _last_flush_time
|
|
||||||
flush_digest()
|
|
||||||
_last_flush_time = time.time()
|
|
||||||
elif is_critical(alert) or is_batched_label(alert):
|
|
||||||
add_to_digest(alert)
|
|
||||||
# else: IRC-only (warnings without thermal_print label)
|
|
||||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
|
||||||
self.wfile.write(b'{"status":"ok"}')
|
self.wfile.write(b'{"status":"ok"}')
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
self.send_response(200)
|
||||||
with _buffer_lock:
|
self.send_header("Content-Type", "application/json")
|
||||||
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
|
self.end_headers()
|
||||||
depth = len(_buffer)
|
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
|
||||||
info = {
|
|
||||||
"service": "irc-notify",
|
|
||||||
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
|
|
||||||
"batch_interval_min": BATCH_INTERVAL_MIN,
|
|
||||||
"batch_max_pending": BATCH_MAX_PENDING,
|
|
||||||
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
|
|
||||||
"print_web_url": PRINT_WEB_URL},
|
|
||||||
"buffer": {"depth": depth, "alertnames": alertnames,
|
|
||||||
"seconds_since_last_flush": int(time.time() - _last_flush_time),
|
|
||||||
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
|
|
||||||
"stats": _stats,
|
|
||||||
}
|
|
||||||
self.wfile.write(json.dumps(info, indent=2).encode())
|
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
threading.Thread(target=digest_loop, daemon=True).start()
|
|
||||||
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
||||||
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
|
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -3659,7 +3509,7 @@ data:
|
|||||||
- refId: A
|
- refId: A
|
||||||
relativeTimeRange: {from: 300, to: 0}
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
datasourceUid: prometheus
|
datasourceUid: prometheus
|
||||||
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))"} == 0', instant: true, refId: A}
|
model: {expr: 'kube_deployment_status_replicas_ready{namespace="github-runner",deployment=~"github-runner(|-.+)"} == 0', instant: true, refId: A}
|
||||||
- refId: B
|
- refId: B
|
||||||
relativeTimeRange: {from: 300, to: 0}
|
relativeTimeRange: {from: 300, to: 0}
|
||||||
datasourceUid: __expr__
|
datasourceUid: __expr__
|
||||||
|
|||||||
@@ -67,6 +67,8 @@ public sealed class FleetManifestLintTests
|
|||||||
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
|
["github-runner-chat"] = "https://github.com/astoltz/FlowerCore.Chat",
|
||||||
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
|
["github-runner-mysql"] = "https://github.com/astoltz/FlowerCore.MySQL",
|
||||||
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
|
["github-runner-kiosk-linux"] = "https://github.com/astoltz/FlowerCore.Kiosk.Linux",
|
||||||
|
["github-runner-devicemgmt"] = "https://github.com/astoltz/FlowerCore.DeviceManagement",
|
||||||
|
["github-runner-worldbuilder"] = "https://github.com/astoltz/FlowerCore.WorldBuilder",
|
||||||
};
|
};
|
||||||
|
|
||||||
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
private static readonly HashSet<string> ScaledLinuxRunnerDeployments = new(StringComparer.Ordinal)
|
||||||
@@ -80,6 +82,8 @@ public sealed class FleetManifestLintTests
|
|||||||
"github-runner-chat",
|
"github-runner-chat",
|
||||||
"github-runner-mysql",
|
"github-runner-mysql",
|
||||||
"github-runner-kiosk-linux",
|
"github-runner-kiosk-linux",
|
||||||
|
"github-runner-devicemgmt",
|
||||||
|
"github-runner-worldbuilder",
|
||||||
};
|
};
|
||||||
|
|
||||||
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
|
private static readonly IReadOnlyDictionary<string, string> WritableRunnerEnv = new Dictionary<string, string>(StringComparer.Ordinal)
|
||||||
@@ -234,7 +238,7 @@ public sealed class FleetManifestLintTests
|
|||||||
{
|
{
|
||||||
deployments.Should().ContainKey(expectedRunner.Key);
|
deployments.Should().ContainKey(expectedRunner.Key);
|
||||||
|
|
||||||
var container = deployments[expectedRunner.Key].ContainerMappings().Should().ContainSingle().Subject;
|
var container = RunnerContainer(deployments[expectedRunner.Key]);
|
||||||
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
EnvValue(container, "REPO_URL").Should().Be(expectedRunner.Value);
|
||||||
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
EnvValue(container, "EPHEMERAL").Should().Be("true");
|
||||||
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
EnvValue(container, "LABELS").Should().Be("self-hosted,linux,fc-build-linux");
|
||||||
@@ -250,7 +254,7 @@ public sealed class FleetManifestLintTests
|
|||||||
{
|
{
|
||||||
foreach (var deployment in GitHubRunnerDeployments().Values)
|
foreach (var deployment in GitHubRunnerDeployments().Values)
|
||||||
{
|
{
|
||||||
var container = deployment.ContainerMappings().Should().ContainSingle().Subject;
|
var container = RunnerContainer(deployment);
|
||||||
|
|
||||||
foreach (var expectedEnv in WritableRunnerEnv)
|
foreach (var expectedEnv in WritableRunnerEnv)
|
||||||
{
|
{
|
||||||
@@ -311,7 +315,7 @@ public sealed class FleetManifestLintTests
|
|||||||
monitoring.Should().Contain("MacMiniRunnerOffline");
|
monitoring.Should().Contain("MacMiniRunnerOffline");
|
||||||
monitoring.Should().Contain("LinuxRunnerOffline");
|
monitoring.Should().Contain("LinuxRunnerOffline");
|
||||||
monitoring.Should().Contain("kube_deployment_status_replicas_ready");
|
monitoring.Should().Contain("kube_deployment_status_replicas_ready");
|
||||||
monitoring.Should().Contain("github-runner(|-(sharedpos|puppet|signage|dms|telephony|print-web|chat|mysql|kiosk-linux))");
|
monitoring.Should().Contain("github-runner(|-.+)");
|
||||||
monitoring.Should().Contain("folder: CI Alerts");
|
monitoring.Should().Contain("folder: CI Alerts");
|
||||||
monitoring.Should().Contain("uid: linux-runner-offline");
|
monitoring.Should().Contain("uid: linux-runner-offline");
|
||||||
monitoring.Should().Contain("alert_channel: irc");
|
monitoring.Should().Contain("alert_channel: irc");
|
||||||
@@ -432,7 +436,6 @@ public sealed class FleetManifestLintTests
|
|||||||
"1password-item.yaml",
|
"1password-item.yaml",
|
||||||
"argocd-application.yaml",
|
"argocd-application.yaml",
|
||||||
"certificate-web.yaml",
|
"certificate-web.yaml",
|
||||||
"clusterissuer-step-ca-agent.yaml",
|
|
||||||
"clusterrole-operator.yaml",
|
"clusterrole-operator.yaml",
|
||||||
"clusterrolebinding-operator.yaml",
|
"clusterrolebinding-operator.yaml",
|
||||||
"deployment-operator.yaml",
|
"deployment-operator.yaml",
|
||||||
@@ -517,53 +520,6 @@ public sealed class FleetManifestLintTests
|
|||||||
.ContainSingle("devices.iamworkin.lan");
|
.ContainSingle("devices.iamworkin.lan");
|
||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void FcDeviceManagement_StepCaAgentIssuerMustTargetNocProvisioner()
|
|
||||||
{
|
|
||||||
var issuer = FcDeviceManagementDocuments()
|
|
||||||
.Single(document => document.Kind == "StepClusterIssuer" && document.Name == "step-ca-agent");
|
|
||||||
|
|
||||||
issuer.Scalar("apiVersion").Should().Be("certmanager.step.sm/v1beta1");
|
|
||||||
issuer.Scalar("spec", "url").Should().Be("https://10.0.56.10:9443");
|
|
||||||
issuer.Scalar("spec", "caBundle").Should().NotBeNullOrWhiteSpace();
|
|
||||||
issuer.Scalar("spec", "provisioner", "name").Should().Be("step-ca-agent");
|
|
||||||
issuer.Scalar("spec", "provisioner", "kid").Should().Be("RF3A9welUYVOWBX8tr19aWyA2kQlxoGZN1dRwTElUEM");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void FcDeviceManagement_StepCaAgentIssuerMustReferencePasswordSecretOnly()
|
|
||||||
{
|
|
||||||
var issuer = FcDeviceManagementDocuments()
|
|
||||||
.Single(document => document.Kind == "StepClusterIssuer" && document.Name == "step-ca-agent");
|
|
||||||
|
|
||||||
issuer.Scalar("spec", "provisioner", "passwordRef", "name")
|
|
||||||
.Should()
|
|
||||||
.Be("step-ca-agent-provisioner-password");
|
|
||||||
issuer.Scalar("spec", "provisioner", "passwordRef", "namespace").Should().Be("cert-manager");
|
|
||||||
issuer.Scalar("spec", "provisioner", "passwordRef", "key").Should().Be("password");
|
|
||||||
|
|
||||||
var issuerText = File.ReadAllText(Path.Combine(Inventory.BluejayRoot, "apps", "fc-devicemgmt", "clusterissuer-step-ca-agent.yaml"));
|
|
||||||
issuerText.Should().NotContain("stringData:");
|
|
||||||
issuerText.Should().NotContain("password:");
|
|
||||||
issuerText.Should().NotContain("privateKey");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
|
||||||
public void FcDeviceManagement_StepCaAgentIssuerMustCarryTraceabilityMetadata()
|
|
||||||
{
|
|
||||||
var issuer = FcDeviceManagementDocuments()
|
|
||||||
.Single(document => document.Kind == "StepClusterIssuer" && document.Name == "step-ca-agent");
|
|
||||||
|
|
||||||
issuer.Scalar("metadata", "labels", "app.kubernetes.io/managed-by").Should().Be("argocd");
|
|
||||||
issuer.Scalar("metadata", "labels", "flowercore.io/tenant-id").Should().Be("system");
|
|
||||||
issuer.Scalar("metadata", "annotations", "flowercore.io/provisioner-source")
|
|
||||||
.Should()
|
|
||||||
.Be("profile::pki::stepca");
|
|
||||||
issuer.Scalar("metadata", "annotations", "flowercore.io/secret-source")
|
|
||||||
.Should()
|
|
||||||
.Be("cert-manager/step-ca-agent-provisioner-password");
|
|
||||||
}
|
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
|
public void FcDeviceManagement_OperatorRbacMustCoverDevicesAndOwnerLookup()
|
||||||
{
|
{
|
||||||
@@ -689,6 +645,15 @@ public sealed class FleetManifestLintTests
|
|||||||
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
|
return EnvMapping(container, name) is { } env ? ManifestNodeExtensions.Scalar(env, "value") : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static YamlMappingNode RunnerContainer(ManifestDocument deployment)
|
||||||
|
{
|
||||||
|
return deployment.ContainerMappings()
|
||||||
|
.Where(container => string.Equals(ManifestNodeExtensions.Scalar(container, "name"), "runner", StringComparison.Ordinal))
|
||||||
|
.Should()
|
||||||
|
.ContainSingle($"{deployment.Name} must keep exactly one main runner container")
|
||||||
|
.Subject;
|
||||||
|
}
|
||||||
|
|
||||||
private static string? EnvSecretName(YamlMappingNode container, string name)
|
private static string? EnvSecretName(YamlMappingNode container, string name)
|
||||||
{
|
{
|
||||||
return EnvMapping(container, name) is { } env
|
return EnvMapping(container, name) is { } env
|
||||||
|
|||||||
Reference in New Issue
Block a user