Compare commits
9 Commits
sprint37/c
...
sprint42/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6e581d2879 | ||
| ea73f00461 | |||
|
|
25ace30a03 | ||
|
|
ca574c2280 | ||
|
|
09387f90e1 | ||
|
|
e641ceab48 | ||
|
|
c263426ea5 | ||
|
|
bacac067cf | ||
| 914fed08d8 |
@@ -1,27 +0,0 @@
|
|||||||
# FlowerCore Brochure
|
|
||||||
|
|
||||||
`apps/brochure` hosts the public brochure split from `FlowerCore.Intranet.Web`.
|
|
||||||
ArgoCD's `apps/*` ApplicationSet will create `infra-brochure` after this
|
|
||||||
directory lands on `main`.
|
|
||||||
|
|
||||||
## Runtime
|
|
||||||
|
|
||||||
- Host: `https://brochure.flowercore.io`
|
|
||||||
- Namespace: `brochure`
|
|
||||||
- Deployment: `brochure-web`
|
|
||||||
- Image: `localhost/fc-brochure-web:v20260524-sprint32`
|
|
||||||
- Port: `8080`
|
|
||||||
- Public route method allowlist: `GET` and `HEAD`
|
|
||||||
|
|
||||||
## Operator Actions
|
|
||||||
|
|
||||||
1. Publish and import `localhost/fc-brochure-web:v20260524-sprint32` to every
|
|
||||||
RKE2 node before sync, using the same podman save + `ctr images import`
|
|
||||||
flow as the Intranet deployment.
|
|
||||||
2. Create the Cloudflare DNS record for `brochure.flowercore.io` pointing at
|
|
||||||
the FlowerCore public edge.
|
|
||||||
3. Verify `infra-brochure` appears in ArgoCD, the certificate becomes Ready,
|
|
||||||
and `GET https://brochure.flowercore.io/` returns `200`.
|
|
||||||
|
|
||||||
The route intentionally does not expose `/ops/*` or `/admin/*`; the Brochure
|
|
||||||
web app returns `404` for those paths and Traefik only forwards read methods.
|
|
||||||
@@ -1,131 +0,0 @@
|
|||||||
# FlowerCore Brochure public host
|
|
||||||
#
|
|
||||||
# Thin Blazor host for public What's New, walkthrough, and gallery content
|
|
||||||
# carved out of FlowerCore.Intranet.Web. The ApplicationSet creates
|
|
||||||
# infra-brochure from this directory after merge.
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: brochure
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: brochure-web
|
|
||||||
namespace: brochure
|
|
||||||
labels:
|
|
||||||
app: brochure-web
|
|
||||||
app.kubernetes.io/name: brochure-web
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
revisionHistoryLimit: 3
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: brochure-web
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: brochure-web
|
|
||||||
app.kubernetes.io/name: brochure-web
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: brochure-web
|
|
||||||
image: localhost/fc-brochure-web:v20260524-sprint32
|
|
||||||
imagePullPolicy: Never
|
|
||||||
ports:
|
|
||||||
- containerPort: 8080
|
|
||||||
name: http
|
|
||||||
env:
|
|
||||||
- name: ASPNETCORE_ENVIRONMENT
|
|
||||||
value: Production
|
|
||||||
- name: ASPNETCORE_URLS
|
|
||||||
value: "http://+:8080"
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: "25m"
|
|
||||||
memory: "128Mi"
|
|
||||||
limits:
|
|
||||||
cpu: "500m"
|
|
||||||
memory: "512Mi"
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health
|
|
||||||
port: http
|
|
||||||
initialDelaySeconds: 10
|
|
||||||
periodSeconds: 10
|
|
||||||
livenessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /health
|
|
||||||
port: http
|
|
||||||
initialDelaySeconds: 30
|
|
||||||
periodSeconds: 30
|
|
||||||
securityContext:
|
|
||||||
runAsNonRoot: true
|
|
||||||
runAsUser: 1654
|
|
||||||
runAsGroup: 1654
|
|
||||||
allowPrivilegeEscalation: false
|
|
||||||
readOnlyRootFilesystem: true
|
|
||||||
capabilities:
|
|
||||||
drop:
|
|
||||||
- ALL
|
|
||||||
volumeMounts:
|
|
||||||
- name: tmp
|
|
||||||
mountPath: /tmp
|
|
||||||
volumes:
|
|
||||||
- name: tmp
|
|
||||||
emptyDir: {}
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: brochure-web
|
|
||||||
namespace: brochure
|
|
||||||
labels:
|
|
||||||
app: brochure-web
|
|
||||||
app.kubernetes.io/name: brochure-web
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
spec:
|
|
||||||
type: ClusterIP
|
|
||||||
selector:
|
|
||||||
app: brochure-web
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
port: 8080
|
|
||||||
targetPort: http
|
|
||||||
---
|
|
||||||
apiVersion: cert-manager.io/v1
|
|
||||||
kind: Certificate
|
|
||||||
metadata:
|
|
||||||
name: brochure-web-tls
|
|
||||||
namespace: brochure
|
|
||||||
spec:
|
|
||||||
secretName: brochure-web-tls
|
|
||||||
issuerRef:
|
|
||||||
name: step-ca-acme
|
|
||||||
kind: ClusterIssuer
|
|
||||||
dnsNames:
|
|
||||||
- brochure.flowercore.io
|
|
||||||
duration: 720h
|
|
||||||
renewBefore: 240h
|
|
||||||
---
|
|
||||||
apiVersion: traefik.io/v1alpha1
|
|
||||||
kind: IngressRoute
|
|
||||||
metadata:
|
|
||||||
name: brochure-web-public
|
|
||||||
namespace: brochure
|
|
||||||
spec:
|
|
||||||
entryPoints:
|
|
||||||
- websecure
|
|
||||||
routes:
|
|
||||||
- match: Host(`brochure.flowercore.io`) && (Method(`GET`) || Method(`HEAD`))
|
|
||||||
kind: Rule
|
|
||||||
services:
|
|
||||||
- name: brochure-web
|
|
||||||
port: 8080
|
|
||||||
tls:
|
|
||||||
secretName: brochure-web-tls
|
|
||||||
263
apps/fc-build-windows/README.md
Normal file
263
apps/fc-build-windows/README.md
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
# fc-build-windows runner gate
|
||||||
|
|
||||||
|
Status: OPEN-WITH-OPERATOR-ACTION as of 2026-05-20.
|
||||||
|
|
||||||
|
This directory is intentionally not a live runner deployment. It records the
|
||||||
|
exact gate for bringing up the Windows self-hosted runner fleet without faking
|
||||||
|
capacity in GitHub or Kubernetes.
|
||||||
|
|
||||||
|
## Lane evidence
|
||||||
|
|
||||||
|
- `D:\git\FlowerCore\FlowerCore.Notes\docs\dashboards\decisions-waiting.html`
|
||||||
|
lines 15078-15085: Q-MR-82 says the Updater Windows Sandbox E2E run is
|
||||||
|
queued and `bluejay-ws-sandbox-1` is offline.
|
||||||
|
- `D:\git\FlowerCore\FlowerCore.Notes\memory\project_morning_routine_8_2026_05_20.md`:
|
||||||
|
Morning Routine #8 carries Q-MR-82 as the fleet-wide Windows runner gap.
|
||||||
|
- `D:\git\FlowerCore\FlowerCore.Notes\docs\standards\sprint-37-codex-dispatch-log-2026-05-19.md`
|
||||||
|
lines 76, 84-85, and 97: keep BLUEJAY-WS out of runner plans, merge Linux
|
||||||
|
runner expansion separately, and keep true Windows-only workflows parked on
|
||||||
|
the Windows runner host substrate path.
|
||||||
|
- `D:\git\FlowerCore\FlowerCore.Notes\docs\ai-agents\codex-prompts\2026-05-20-xxxxl-sprint-42-orchestrator-briefs.md`
|
||||||
|
lane Cx-5: land a deployment only if a Windows runner image/substrate is
|
||||||
|
ready; otherwise commit an operator-action gate.
|
||||||
|
- `D:\git\FlowerCore\FlowerCore.Notes\memory\feedback_bluejay_ws_never_a_github_runner.md`:
|
||||||
|
BLUEJAY-WS is operator-only territory; Windows runners belong on a dedicated
|
||||||
|
KubeVirt Windows VM such as `ci1` or a sibling VM.
|
||||||
|
|
||||||
|
## Live probe summary
|
||||||
|
|
||||||
|
Commands run on 2026-05-20 from `D:\git\FlowerCore\bluejay-infra`:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
|
||||||
|
kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}{"`t"}{.metadata.labels.kubernetes\.io/os}{"`n"}{end}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: `rke2-agent1`, `rke2-agent2`, and `rke2-server` all report
|
||||||
|
`kubernetes.io/os=linux`. There is no Windows Kubernetes node, so Windows
|
||||||
|
containers on RKE2 cannot satisfy `fc-build-windows`.
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
kubectl -n kubevirt-vms get vm,vmi,pods -o wide
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: KubeVirt is healthy and `ci1` is `Running` / `Ready=True` on
|
||||||
|
`rke2-agent1` with VMI IP `10.42.103.35`.
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml port-forward vm/ci1.kubevirt-vms 15985:5985
|
||||||
|
```
|
||||||
|
|
||||||
|
Result during port tests: `dial tcp 10.42.103.35:5985: connect: no route to
|
||||||
|
host`. The same result was seen for RDP 3389 and SSH 22. The VM exists, but it
|
||||||
|
is not remotely reachable for runner bootstrap from this lane.
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
|
||||||
|
--jq '.runners[]? | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
gh run list --repo astoltz/FlowerCore.Updater `
|
||||||
|
--workflow "Updater Windows Sandbox E2E" --limit 5
|
||||||
|
```
|
||||||
|
|
||||||
|
Result: GitHub has one Updater runner, `bluejay-ws-sandbox-1`, with
|
||||||
|
`status=offline`; run `26150689447` is still `queued`.
|
||||||
|
|
||||||
|
## Feasibility classification
|
||||||
|
|
||||||
|
### Option A: Windows containers on RKE2
|
||||||
|
|
||||||
|
Not feasible without operator-physical infrastructure work. Kubernetes Windows
|
||||||
|
containers require a Windows node. The current cluster has Linux-only RKE2
|
||||||
|
nodes.
|
||||||
|
|
||||||
|
### Option B: KubeVirt Windows VM
|
||||||
|
|
||||||
|
Partially present, not deployable from this lane.
|
||||||
|
|
||||||
|
`apps/kubevirt-vms/ci1.yaml` already defines a Windows Server 2025 KubeVirt VM
|
||||||
|
using `localhost/fc-win-server-2025:v1`, and the live VM is running. However:
|
||||||
|
|
||||||
|
- the guest is not reachable over RDP, WinRM, or SSH through `virtctl
|
||||||
|
port-forward`;
|
||||||
|
- the current root disk is a `containerDisk`, so runner installation inside the
|
||||||
|
running guest is not a durable fleet state unless the first-boot automation
|
||||||
|
re-registers on every boot or the VM is moved to a persistent PVC-backed
|
||||||
|
disk;
|
||||||
|
- FC.Updater `Updater Windows Sandbox E2E` uses
|
||||||
|
`[self-hosted, windows, windows-sandbox]`, while `fc-build-windows` build jobs
|
||||||
|
use `[self-hosted, windows, fc-build-windows]`. Do not advertise
|
||||||
|
`windows-sandbox` until Windows Sandbox has been proven in the guest.
|
||||||
|
|
||||||
|
### Option C: bluejay-ws-sandbox-1
|
||||||
|
|
||||||
|
Operator-only emergency fallback. GitHub shows it registered but offline. The
|
||||||
|
current memory says BLUEJAY-WS must not be a fleet runner host, so this lane
|
||||||
|
does not start or re-register it. If the operator deliberately overrides the
|
||||||
|
policy to drain an emergency queue, start the existing visible runner console
|
||||||
|
from the BLUEJAY-WS desktop and treat that as temporary break-glass, not the
|
||||||
|
permanent Q-MR-82 closure.
|
||||||
|
|
||||||
|
## Operator action plan
|
||||||
|
|
||||||
|
### 1. Pick the Windows host class
|
||||||
|
|
||||||
|
Use `ci1` or a sibling Windows Server 2025 VM for WPF build/test jobs that need
|
||||||
|
`fc-build-windows`.
|
||||||
|
|
||||||
|
Use a Windows 11 Pro/Enterprise KubeVirt VM for Updater or WorldBuilder
|
||||||
|
Windows Sandbox gates, unless Windows Sandbox support is explicitly proven on
|
||||||
|
the selected guest. The workflow labels must match the real capability:
|
||||||
|
|
||||||
|
- WPF build runner: `self-hosted,windows,fc-build-windows,ci1`
|
||||||
|
- Sandbox runner: `self-hosted,windows,windows-sandbox,ci-sandbox1`
|
||||||
|
|
||||||
|
### 2. Make the VM reachable and durable
|
||||||
|
|
||||||
|
From BLUEJAY-WS:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
|
||||||
|
kubectl -n kubevirt-vms get vm,vmi,pods -o wide
|
||||||
|
virtctl --kubeconfig $env:KUBECONFIG vnc ci1 -n kubevirt-vms
|
||||||
|
virtctl --kubeconfig $env:KUBECONFIG port-forward vm/ci1.kubevirt-vms 13389:3389
|
||||||
|
virtctl --kubeconfig $env:KUBECONFIG port-forward vm/ci1.kubevirt-vms 15985:5985
|
||||||
|
```
|
||||||
|
|
||||||
|
Before runner registration, fix the current port-forward failure. The expected
|
||||||
|
state is that RDP or WinRM accepts a connection through the control plane.
|
||||||
|
|
||||||
|
For durability, either:
|
||||||
|
|
||||||
|
- move the runner VM to a persistent PVC-backed root disk; or
|
||||||
|
- keep `containerDisk` and bake first-boot runner registration into the sysprep
|
||||||
|
flow using a non-expiring credential lookup path.
|
||||||
|
|
||||||
|
Do not install a runner by hand into a transient VM and call Q-MR-82 closed.
|
||||||
|
|
||||||
|
### 3. Install runner prerequisites inside the VM
|
||||||
|
|
||||||
|
Run in an elevated PowerShell session in the Windows runner guest:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
winget install Microsoft.DotNet.SDK.10 --silent
|
||||||
|
winget install Microsoft.DotNet.DesktopRuntime.8 --silent
|
||||||
|
winget install Microsoft.PowerShell --silent
|
||||||
|
winget install Git.Git --silent
|
||||||
|
winget install Microsoft.VisualStudio.2022.BuildTools --silent
|
||||||
|
winget install Google.Chrome --silent
|
||||||
|
```
|
||||||
|
|
||||||
|
For a Sandbox-capable runner only:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
Enable-WindowsOptionalFeature -Online -FeatureName Containers-DisposableClientVM -All
|
||||||
|
Restart-Computer -Force
|
||||||
|
```
|
||||||
|
|
||||||
|
After reboot:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
Get-CimInstance -ClassName Win32_OptionalFeature -Filter "Name='Containers-DisposableClientVM'"
|
||||||
|
Test-Path C:\Windows\System32\WindowsSandbox.exe
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Register repo-scoped GitHub runners
|
||||||
|
|
||||||
|
The `astoltz` account uses repo-scoped runners. Generate a fresh one-hour
|
||||||
|
registration token per repo immediately before `config.cmd`.
|
||||||
|
|
||||||
|
From a trusted operator shell with `gh` authenticated:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$repos = @(
|
||||||
|
"FlowerCore.Updater",
|
||||||
|
"FlowerCore.WorldBuilder",
|
||||||
|
"FlowerCore.DeviceManagement"
|
||||||
|
)
|
||||||
|
|
||||||
|
foreach ($repo in $repos) {
|
||||||
|
$token = gh api -X POST "/repos/astoltz/$repo/actions/runners/registration-token" --jq .token
|
||||||
|
$repoSlug = $repo.ToLowerInvariant().Replace("flowercore.", "").Replace(".", "-")
|
||||||
|
$runnerDir = "C:\fc-ghr\$repoSlug-fc-build-windows"
|
||||||
|
|
||||||
|
New-Item -ItemType Directory -Force -Path $runnerDir | Out-Null
|
||||||
|
Set-Location $runnerDir
|
||||||
|
|
||||||
|
if (-not (Test-Path ".\config.cmd")) {
|
||||||
|
Invoke-WebRequest `
|
||||||
|
-Uri "https://github.com/actions/runner/releases/download/v2.323.0/actions-runner-win-x64-2.323.0.zip" `
|
||||||
|
-OutFile "actions-runner.zip"
|
||||||
|
Add-Type -AssemblyName System.IO.Compression.FileSystem
|
||||||
|
[System.IO.Compression.ZipFile]::ExtractToDirectory((Resolve-Path actions-runner.zip), $runnerDir)
|
||||||
|
}
|
||||||
|
|
||||||
|
.\config.cmd `
|
||||||
|
--url "https://github.com/astoltz/$repo" `
|
||||||
|
--token $token `
|
||||||
|
--name "ci1-$repoSlug-fc-build-windows" `
|
||||||
|
--labels "self-hosted,windows,fc-build-windows,ci1" `
|
||||||
|
--work "_work" `
|
||||||
|
--unattended `
|
||||||
|
--replace
|
||||||
|
|
||||||
|
.\svc.ps1 install
|
||||||
|
.\svc.ps1 start
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For Updater Sandbox E2E, register only after the guest proves Sandbox support,
|
||||||
|
and use `windows-sandbox` labels:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
$token = gh api -X POST "/repos/astoltz/FlowerCore.Updater/actions/runners/registration-token" --jq .token
|
||||||
|
.\config.cmd `
|
||||||
|
--url "https://github.com/astoltz/FlowerCore.Updater" `
|
||||||
|
--token $token `
|
||||||
|
--name "ci-sandbox1-updater" `
|
||||||
|
--labels "self-hosted,windows,windows-sandbox,ci-sandbox1" `
|
||||||
|
--work "_work" `
|
||||||
|
--unattended `
|
||||||
|
--replace
|
||||||
|
```
|
||||||
|
|
||||||
|
Keep registration tokens out of Git and logs. The durable credential source for
|
||||||
|
automation should be the existing 1Password item named `GitHub PAT (Runner
|
||||||
|
Registration)`, used only to mint short-lived repo registration tokens.
|
||||||
|
|
||||||
|
### 5. Verify GitHub and workflow pickup
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
|
||||||
|
--jq '.runners[] | select(.labels[].name == "windows-sandbox") | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
|
||||||
|
gh api /repos/astoltz/FlowerCore.DeviceManagement/actions/runners `
|
||||||
|
--jq '.runners[] | select(.labels[].name == "fc-build-windows") | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
|
||||||
|
gh run list --repo astoltz/FlowerCore.Updater `
|
||||||
|
--workflow "Updater Windows Sandbox E2E" --limit 3
|
||||||
|
```
|
||||||
|
|
||||||
|
Q-MR-82 can be marked resolved only after the Updater run moves from `queued` to
|
||||||
|
`in_progress` or `completed` on an online runner, or after the affected WPF
|
||||||
|
build repos show online `fc-build-windows` repo-scoped runners and their queued
|
||||||
|
jobs start.
|
||||||
|
|
||||||
|
## Break-glass BLUEJAY-WS command
|
||||||
|
|
||||||
|
Only if the operator explicitly overrides the "BLUEJAY-WS is not a runner"
|
||||||
|
policy to drain a queue:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
Set-Location C:\fc-ghr\updater-sandbox
|
||||||
|
.\run.cmd
|
||||||
|
```
|
||||||
|
|
||||||
|
If a Windows service exists:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
Get-Service 'actions.runner.*'
|
||||||
|
Start-Service 'actions.runner.*'
|
||||||
|
```
|
||||||
|
|
||||||
|
This does not close Q-MR-82 permanently. It is a temporary queue drain until a
|
||||||
|
dedicated VM runner is online.
|
||||||
4
apps/fc-build-windows/kustomization.yaml
Normal file
4
apps/fc-build-windows/kustomization.yaml
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- operator-gate-configmap.yaml
|
||||||
61
apps/fc-build-windows/operator-gate-configmap.yaml
Normal file
61
apps/fc-build-windows/operator-gate-configmap.yaml
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: fc-build-windows-operator-gate
|
||||||
|
namespace: kubevirt-vms
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: fc-build-windows
|
||||||
|
app.kubernetes.io/component: operator-gate
|
||||||
|
app.kubernetes.io/part-of: github-runner
|
||||||
|
flowercore.io/q-card: Q-MR-82
|
||||||
|
annotations:
|
||||||
|
flowercore.io/outcome: OPEN-WITH-OPERATOR-ACTION
|
||||||
|
flowercore.io/live-runner: "false"
|
||||||
|
data:
|
||||||
|
outcome: OPEN-WITH-OPERATOR-ACTION
|
||||||
|
gate.md: |
|
||||||
|
Do not treat this ConfigMap as runner capacity.
|
||||||
|
|
||||||
|
Current probe, 2026-05-20:
|
||||||
|
- RKE2 nodes are linux-only; Windows containers require a Windows node.
|
||||||
|
- KubeVirt `ci1` is Running/Ready, but RDP 3389, WinRM 5985, and SSH 22
|
||||||
|
through `virtctl port-forward` return `connect: no route to host`.
|
||||||
|
- GitHub Updater runner list has only `bluejay-ws-sandbox-1`, status
|
||||||
|
offline. Updater Windows Sandbox E2E run 26150689447 remains queued.
|
||||||
|
|
||||||
|
Required operator action:
|
||||||
|
1. Make a dedicated Windows VM reachable and durable.
|
||||||
|
2. Install .NET 10 SDK, .NET 8 Desktop Runtime, Git, VS Build Tools, and
|
||||||
|
PowerShell 7.
|
||||||
|
3. Register repo-scoped runners with short-lived GitHub registration tokens.
|
||||||
|
4. Add `fc-build-windows` labels only to WPF build-capable guests.
|
||||||
|
5. Add `windows-sandbox` labels only after Sandbox support is proven.
|
||||||
|
registration-token-pattern.ps1: |
|
||||||
|
$repo = "FlowerCore.Updater"
|
||||||
|
$token = gh api -X POST "/repos/astoltz/$repo/actions/runners/registration-token" --jq .token
|
||||||
|
$runnerDir = "C:\fc-ghr\updater-fc-build-windows"
|
||||||
|
|
||||||
|
New-Item -ItemType Directory -Force -Path $runnerDir | Out-Null
|
||||||
|
Set-Location $runnerDir
|
||||||
|
|
||||||
|
# Install the Actions runner package here if config.cmd is absent.
|
||||||
|
.\config.cmd `
|
||||||
|
--url "https://github.com/astoltz/$repo" `
|
||||||
|
--token $token `
|
||||||
|
--name "ci1-updater-fc-build-windows" `
|
||||||
|
--labels "self-hosted,windows,fc-build-windows,ci1" `
|
||||||
|
--work "_work" `
|
||||||
|
--unattended `
|
||||||
|
--replace
|
||||||
|
|
||||||
|
.\svc.ps1 install
|
||||||
|
.\svc.ps1 start
|
||||||
|
verification.ps1: |
|
||||||
|
gh api /repos/astoltz/FlowerCore.Updater/actions/runners `
|
||||||
|
--jq '.runners[] | {name,status,busy,labels:[.labels[].name]}'
|
||||||
|
|
||||||
|
gh run list --repo astoltz/FlowerCore.Updater `
|
||||||
|
--workflow "Updater Windows Sandbox E2E" --limit 3
|
||||||
|
|
||||||
|
$env:KUBECONFIG="$env:USERPROFILE\.kube\rke2.yaml"
|
||||||
|
kubectl -n kubevirt-vms get vm,vmi,pods -o wide
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
# Explicit ArgoCD Application shape for bootstrap/review.
|
|
||||||
#
|
|
||||||
# The live bluejay-infra ApplicationSet already discovers apps/* directories
|
|
||||||
# and creates this same Application name (`infra-fc-devicemgmt`) automatically.
|
|
||||||
# Keep repoURL on the internal Gitea ClusterIP URL; ArgoCD does not trust the
|
|
||||||
# external step-ca HTTPS endpoint.
|
|
||||||
apiVersion: argoproj.io/v1alpha1
|
|
||||||
kind: Application
|
|
||||||
metadata:
|
|
||||||
name: infra-fc-devicemgmt
|
|
||||||
namespace: argocd
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: fc-devicemgmt
|
|
||||||
app.kubernetes.io/part-of: flowercore
|
|
||||||
app.kubernetes.io/managed-by: argocd
|
|
||||||
flowercore.io/tenant-id: system
|
|
||||||
flowercore.io/created-by: bluejay-infra
|
|
||||||
spec:
|
|
||||||
project: default
|
|
||||||
source:
|
|
||||||
repoURL: http://gitea-clusterip.gitea.svc.cluster.local:3000/bluejay/bluejay-infra.git
|
|
||||||
targetRevision: main
|
|
||||||
path: apps/fc-devicemgmt
|
|
||||||
destination:
|
|
||||||
server: https://kubernetes.default.svc
|
|
||||||
namespace: fc-devicemgmt
|
|
||||||
syncPolicy:
|
|
||||||
automated:
|
|
||||||
prune: true
|
|
||||||
selfHeal: true
|
|
||||||
syncOptions:
|
|
||||||
- CreateNamespace=true
|
|
||||||
- ServerSideApply=true
|
|
||||||
@@ -47,7 +47,7 @@ spec:
|
|||||||
fsGroupChangePolicy: OnRootMismatch
|
fsGroupChangePolicy: OnRootMismatch
|
||||||
containers:
|
containers:
|
||||||
- name: operator
|
- name: operator
|
||||||
image: localhost/fc-devicemgmt-operator:v20260512-cx5
|
image: localhost/fc-devicemgmt-operator:v20260519-sp34cl3-fix
|
||||||
imagePullPolicy: Never
|
imagePullPolicy: Never
|
||||||
ports:
|
ports:
|
||||||
- name: metrics
|
- name: metrics
|
||||||
|
|||||||
@@ -4,6 +4,22 @@
|
|||||||
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
# Sprint 9+ lane. This manifest is static-valid without requiring the image to
|
||||||
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
# exist yet; import localhost/fc-devicemgmt-web:<tag> to all schedulable RKE2
|
||||||
# nodes before letting ArgoCD sync a live rollout.
|
# nodes before letting ArgoCD sync a live rollout.
|
||||||
|
#
|
||||||
|
# SCALED TO 0 — 2026-05-19 morning-routine cleanup.
|
||||||
|
# The Web pod cannot start until TWO upstream gaps close:
|
||||||
|
# 1. MySQL DB instance `flowercore_devicemgmt` (user `fc_devicemgmt`) is
|
||||||
|
# provisioned via fc-mysql Manager. The cluster currently has ZERO
|
||||||
|
# MySqlInstanceCrds and no `mysql.fc-mysql.svc:3306` Service, so the
|
||||||
|
# deployment-web container env `FlowerCore__Database__Host=mysql.fc-mysql.svc`
|
||||||
|
# points at nothing. Provision via the fc-mysql Manager UI/REST/MCP.
|
||||||
|
# 2. 1Password vault item `IAmWorkin/FlowerCore DeviceManagement Runtime`
|
||||||
|
# with 5 fields (DB-Password, mtls-ca.pem, mtls-client.crt, mtls-client.key,
|
||||||
|
# mtls-chain.pem) — see apps/fc-devicemgmt/1password-item.yaml. Mint mTLS
|
||||||
|
# from step-ca-agent ClusterIssuer per ADR-126; DB-Password must match the
|
||||||
|
# password configured for the MySQL user.
|
||||||
|
# Re-enable: change replicas back to 2 after both gaps close. The image tag
|
||||||
|
# in this file (v20260512-cx5) MAY also need a refresh — it predates the
|
||||||
|
# Sprint 34 Cl-3 operator fix; Web may have an analogous bug.
|
||||||
apiVersion: apps/v1
|
apiVersion: apps/v1
|
||||||
kind: Deployment
|
kind: Deployment
|
||||||
metadata:
|
metadata:
|
||||||
@@ -20,7 +36,7 @@ metadata:
|
|||||||
annotations:
|
annotations:
|
||||||
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
flowercore.io/traceability-standard: k8s-pod-ownership-and-traceability-standard
|
||||||
spec:
|
spec:
|
||||||
replicas: 2
|
replicas: 0
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
|
|||||||
@@ -1273,24 +1273,55 @@ metadata:
|
|||||||
data:
|
data:
|
||||||
notify.py: |
|
notify.py: |
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""HTTP->IRC alert relay with thermal printer forwarding for Grafana webhooks.
|
"""HTTP->IRC alert relay with thermal-printer DIGEST forwarding.
|
||||||
Listens on :9119, posts to #alerts on UnrealIRCd via raw IRC protocol.
|
|
||||||
Alerts tagged alert_channel=thermal_print also POST to Print.Web /api/print/alert.
|
Listens on :9119, posts to #alerts on UnrealIRCd, forwards to Print.Web
|
||||||
|
/api/print/alert. Thermal printing is BATCHED into hourly digests by
|
||||||
|
default so the printer no longer spam-fires per Grafana webhook.
|
||||||
|
|
||||||
|
Routing (per Grafana webhook alert):
|
||||||
|
- IRC: always per-event (operator likes the stream)
|
||||||
|
- Thermal printer:
|
||||||
|
* severity in {critical,disaster,page} OR
|
||||||
|
label alert_channel=thermal_print_immediate -> print NOW
|
||||||
|
* label alert_channel=thermal_print -> enqueue into hourly digest
|
||||||
|
* everything else -> IRC only
|
||||||
|
- RESOLVED webhooks remove the alert from the digest buffer
|
||||||
|
|
||||||
|
Env vars (defaults preserve old behavior on first deploy):
|
||||||
|
THERMAL_PRINT_ENABLED default "true" - master kill switch
|
||||||
|
BATCH_INTERVAL_MIN default "60" - minutes between digest prints
|
||||||
|
BATCH_MAX_PENDING default "50" - force-flush threshold
|
||||||
|
|
||||||
|
HTTP surface:
|
||||||
|
POST / - Grafana webhook entry
|
||||||
|
POST /flush - manual digest flush (idempotent)
|
||||||
|
GET / - status + config + buffer depth + stats
|
||||||
"""
|
"""
|
||||||
import json, socket, sys, time
|
import json, os, socket, sys, threading, time
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone
|
||||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
from urllib.error import URLError
|
|
||||||
|
|
||||||
IRC_HOST = "unrealircd.irc.svc" # short name: CoreDNS ndots:5 + iamworkin.lan template hijacks full .cluster.local (see memory)
|
THERMAL_PRINT_ENABLED = os.environ.get("THERMAL_PRINT_ENABLED", "true").lower() == "true"
|
||||||
IRC_PORT = 6667
|
BATCH_INTERVAL_MIN = int(os.environ.get("BATCH_INTERVAL_MIN", "60"))
|
||||||
IRC_NICK = "grafana-bot"
|
BATCH_MAX_PENDING = int(os.environ.get("BATCH_MAX_PENDING", "50"))
|
||||||
IRC_CHANNEL = "#alerts"
|
|
||||||
PRINT_WEB_URL = "http://10.0.57.16:5200/api/print/alert"
|
IRC_HOST = os.environ.get("IRC_HOST", "unrealircd.irc.svc")
|
||||||
PRINT_ENABLED = True
|
IRC_PORT = int(os.environ.get("IRC_PORT", "6667"))
|
||||||
|
IRC_NICK = os.environ.get("IRC_NICK", "grafana-bot")
|
||||||
|
IRC_CHANNEL = os.environ.get("IRC_CHANNEL", "#alerts")
|
||||||
|
PRINT_WEB_URL = os.environ.get("PRINT_WEB_URL", "http://10.0.57.16:5200/api/print/alert")
|
||||||
|
|
||||||
|
_buffer_lock = threading.Lock()
|
||||||
|
_buffer = {} # fingerprint -> {"alert": dict, "first_seen": float, "last_seen": float}
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
_stats = {"webhooks_received": 0, "irc_sent": 0, "print_immediate": 0,
|
||||||
|
"digest_flushed": 0, "buffer_dedup": 0, "buffer_added": 0,
|
||||||
|
"buffer_resolved": 0, "started_at": time.time()}
|
||||||
|
|
||||||
def send_irc(message):
|
def send_irc(message):
|
||||||
"""Connect, handle PING, join, send, quit."""
|
|
||||||
try:
|
try:
|
||||||
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
sock = socket.create_connection((IRC_HOST, IRC_PORT), timeout=15)
|
||||||
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
sock.sendall(f"NICK {IRC_NICK}\r\n".encode())
|
||||||
@@ -1323,52 +1354,137 @@ data:
|
|||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
sock.sendall(b"QUIT :alert delivered\r\n")
|
sock.sendall(b"QUIT :alert delivered\r\n")
|
||||||
sock.close()
|
sock.close()
|
||||||
|
_stats["irc_sent"] += 1
|
||||||
return True
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] IRC send failed: {e}", file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def send_thermal_print(alert):
|
def post_thermal(payload, kind):
|
||||||
if not PRINT_ENABLED: return
|
if not THERMAL_PRINT_ENABLED:
|
||||||
labels = alert.get("labels", {})
|
print(f"[irc-notify] thermal disabled; skip {kind} ({payload.get('title','?')[:40]})", file=sys.stderr)
|
||||||
annotations = alert.get("annotations", {})
|
return False
|
||||||
status = alert.get("status", "firing").upper()
|
|
||||||
summary = annotations.get("summary", "")
|
|
||||||
description = annotations.get("description", "")
|
|
||||||
runbook = annotations.get("runbook", "")
|
|
||||||
# Build a useful message: summary + description + runbook steps
|
|
||||||
parts = []
|
|
||||||
if summary: parts.append(summary)
|
|
||||||
if description and description != summary: parts.append(description)
|
|
||||||
if runbook: parts.append("STEPS: " + runbook)
|
|
||||||
message = " | ".join(parts) if parts else labels.get("alertname", "Unknown alert")
|
|
||||||
payload = {
|
|
||||||
"title": labels.get("alertname", "Unknown"),
|
|
||||||
"severity": labels.get("severity", "warning").capitalize(),
|
|
||||||
"host": labels.get("instance", labels.get("host", "unknown")),
|
|
||||||
"message": message,
|
|
||||||
"eventId": alert.get("fingerprint", ""),
|
|
||||||
"source": "Grafana",
|
|
||||||
"status": "RESOLVED" if status == "RESOLVED" else "PROBLEM",
|
|
||||||
"acknowledged": False
|
|
||||||
}
|
|
||||||
try:
|
try:
|
||||||
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
req = Request(PRINT_WEB_URL, data=json.dumps(payload).encode("utf-8"),
|
||||||
headers={"Content-Type": "application/json"}, method="POST")
|
headers={"Content-Type": "application/json"}, method="POST")
|
||||||
resp = urlopen(req, timeout=10)
|
resp = urlopen(req, timeout=10)
|
||||||
print(f"[irc-notify] Thermal print sent: {resp.read().decode()}", file=sys.stderr)
|
if kind == "immediate": _stats["print_immediate"] += 1
|
||||||
|
print(f"[irc-notify] thermal {kind} sent: {payload.get('title','?')[:50]}", file=sys.stderr)
|
||||||
|
return True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[irc-notify] Thermal print failed: {e}", file=sys.stderr)
|
print(f"[irc-notify] thermal {kind} failed: {e}", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
|
||||||
def should_print(alert):
|
def fingerprint_of(alert):
|
||||||
|
fp = alert.get("fingerprint", "")
|
||||||
|
if fp: return fp
|
||||||
labels = alert.get("labels", {})
|
labels = alert.get("labels", {})
|
||||||
if labels.get("alert_channel") == "thermal_print": return True
|
target = labels.get("pod") or labels.get("instance") or labels.get("deployment") or labels.get("statefulset") or labels.get("namespace") or ""
|
||||||
if labels.get("severity", "").lower() in ("critical", "disaster"): return True
|
return f"{labels.get('alertname','?')}/{labels.get('namespace','')}/{target}"
|
||||||
if alert.get("status", "").upper() == "RESOLVED": return False
|
|
||||||
return False
|
def is_critical(alert):
|
||||||
|
return alert.get("labels", {}).get("severity", "").lower() in ("critical", "disaster", "page")
|
||||||
|
|
||||||
|
def is_immediate_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print_immediate"
|
||||||
|
|
||||||
|
def is_batched_label(alert):
|
||||||
|
return alert.get("labels", {}).get("alert_channel") == "thermal_print"
|
||||||
|
|
||||||
|
def add_to_digest(alert):
|
||||||
|
"""Add an alert to the digest buffer. Returns True if the buffer GREW
|
||||||
|
(new fingerprint), False if it was a dedup, resolution, or no-op.
|
||||||
|
"""
|
||||||
|
if not THERMAL_PRINT_ENABLED: return False
|
||||||
|
fp = fingerprint_of(alert)
|
||||||
|
status = alert.get("status", "firing").lower()
|
||||||
|
with _buffer_lock:
|
||||||
|
if status == "resolved":
|
||||||
|
if fp in _buffer:
|
||||||
|
del _buffer[fp]
|
||||||
|
_stats["buffer_resolved"] += 1
|
||||||
|
return False
|
||||||
|
if fp in _buffer:
|
||||||
|
_buffer[fp]["last_seen"] = time.time()
|
||||||
|
_buffer[fp]["alert"] = alert
|
||||||
|
_stats["buffer_dedup"] += 1
|
||||||
|
return False
|
||||||
|
_buffer[fp] = {"alert": alert, "first_seen": time.time(), "last_seen": time.time()}
|
||||||
|
_stats["buffer_added"] += 1
|
||||||
|
return True
|
||||||
|
|
||||||
|
def build_digest_payload():
|
||||||
|
with _buffer_lock:
|
||||||
|
items = list(_buffer.values())
|
||||||
|
if not items: return None
|
||||||
|
by_name = defaultdict(list)
|
||||||
|
for item in items:
|
||||||
|
labels = item["alert"].get("labels", {})
|
||||||
|
by_name[labels.get("alertname", "Unknown")].append(item)
|
||||||
|
lines = []
|
||||||
|
for name, group in sorted(by_name.items()):
|
||||||
|
targets = []
|
||||||
|
for it in group[:5]:
|
||||||
|
labels = it["alert"].get("labels", {})
|
||||||
|
t = (labels.get("pod") or labels.get("instance") or labels.get("deployment")
|
||||||
|
or labels.get("statefulset") or labels.get("namespace") or "?")
|
||||||
|
targets.append(t)
|
||||||
|
more = f" (+{len(group)-5})" if len(group) > 5 else ""
|
||||||
|
sevs = sorted({it["alert"].get("labels", {}).get("severity", "warning") for it in group})
|
||||||
|
lines.append(f"[{'/'.join(sevs)}] {name} x{len(group)}: {', '.join(targets)}{more}")
|
||||||
|
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||||
|
title = f"Alert digest: {len(items)} firing"
|
||||||
|
body = "\n".join([
|
||||||
|
f"=== {title} ===",
|
||||||
|
f"as of {now}",
|
||||||
|
"",
|
||||||
|
*lines,
|
||||||
|
"",
|
||||||
|
"Stream: #alerts (IRC) | Triage: grafana-noc1.iamworkin.lan",
|
||||||
|
"Force-flush: POST irc-notify.monitoring.svc:9119/flush",
|
||||||
|
])
|
||||||
|
return {"title": title, "severity": "Warning", "host": "monitoring",
|
||||||
|
"message": body, "eventId": f"digest-{int(time.time())}",
|
||||||
|
"source": "Grafana digest", "status": "PROBLEM", "acknowledged": False}
|
||||||
|
|
||||||
|
def flush_digest():
|
||||||
|
payload = build_digest_payload()
|
||||||
|
if payload is None:
|
||||||
|
print("[irc-notify] flush: buffer empty, no digest sent", file=sys.stderr)
|
||||||
|
return False
|
||||||
|
sent = post_thermal(payload, "digest")
|
||||||
|
with _buffer_lock:
|
||||||
|
_buffer.clear()
|
||||||
|
if sent: _stats["digest_flushed"] += 1
|
||||||
|
return sent
|
||||||
|
|
||||||
|
def digest_loop():
|
||||||
|
global _last_flush_time
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
now = time.time()
|
||||||
|
elapsed = now - _last_flush_time
|
||||||
|
if elapsed >= BATCH_INTERVAL_MIN * 60:
|
||||||
|
print(f"[irc-notify] digest tick: interval reached ({BATCH_INTERVAL_MIN}m); buffer={len(_buffer)}", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
elif len(_buffer) >= BATCH_MAX_PENDING:
|
||||||
|
print(f"[irc-notify] digest tick: buffer full ({len(_buffer)}); force flush", file=sys.stderr)
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = now
|
||||||
|
time.sleep(15)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[irc-notify] digest loop error: {e}", file=sys.stderr)
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
def do_POST(self):
|
def do_POST(self):
|
||||||
|
if self.path == "/flush":
|
||||||
|
ok = flush_digest()
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
|
self.wfile.write(json.dumps({"flushed": ok, "buffer_after": len(_buffer)}).encode())
|
||||||
|
return
|
||||||
|
_stats["webhooks_received"] += 1
|
||||||
length = int(self.headers.get("Content-Length", 0))
|
length = int(self.headers.get("Content-Length", 0))
|
||||||
body = json.loads(self.rfile.read(length)) if length else {}
|
body = json.loads(self.rfile.read(length)) if length else {}
|
||||||
for alert in body.get("alerts", []):
|
for alert in body.get("alerts", []):
|
||||||
@@ -1383,22 +1499,56 @@ data:
|
|||||||
msg = f"{icon}{sev_tag} {name}: {summary}"
|
msg = f"{icon}{sev_tag} {name}: {summary}"
|
||||||
if desc: msg += f"\n {desc}"
|
if desc: msg += f"\n {desc}"
|
||||||
send_irc(msg)
|
send_irc(msg)
|
||||||
if should_print(alert): send_thermal_print(alert)
|
# Thermal routing — EVERYTHING (including criticals) goes into
|
||||||
self.send_response(200)
|
# the hourly digest. Only the explicit `alert_channel=thermal_print_immediate`
|
||||||
self.send_header("Content-Type", "application/json")
|
# label bypasses, and even that flushes-the-current-digest rather
|
||||||
self.end_headers()
|
# than printing a standalone job, so the same fingerprint can't
|
||||||
|
# spam the printer per webhook cycle.
|
||||||
|
if status == "RESOLVED":
|
||||||
|
add_to_digest(alert) # removes from buffer
|
||||||
|
continue
|
||||||
|
if is_immediate_label(alert):
|
||||||
|
# Explicit opt-in for "paper this NOW" — first arrival of a
|
||||||
|
# new fingerprint triggers an immediate digest flush; repeat
|
||||||
|
# webhooks for the same fingerprint dedupe in the buffer
|
||||||
|
# until the next interval or until the alert resolves.
|
||||||
|
new_in_buffer = add_to_digest(alert)
|
||||||
|
if new_in_buffer:
|
||||||
|
global _last_flush_time
|
||||||
|
flush_digest()
|
||||||
|
_last_flush_time = time.time()
|
||||||
|
elif is_critical(alert) or is_batched_label(alert):
|
||||||
|
add_to_digest(alert)
|
||||||
|
# else: IRC-only (warnings without thermal_print label)
|
||||||
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.wfile.write(b'{"status":"ok"}')
|
self.wfile.write(b'{"status":"ok"}')
|
||||||
|
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
self.send_response(200)
|
self.send_response(200); self.send_header("Content-Type", "application/json"); self.end_headers()
|
||||||
self.send_header("Content-Type", "application/json")
|
with _buffer_lock:
|
||||||
self.end_headers()
|
alertnames = sorted({it["alert"].get("labels", {}).get("alertname", "?") for it in _buffer.values()})
|
||||||
self.wfile.write(json.dumps({"service":"irc-notify","thermal_print":PRINT_ENABLED}).encode())
|
depth = len(_buffer)
|
||||||
|
info = {
|
||||||
|
"service": "irc-notify",
|
||||||
|
"config": {"thermal_print_enabled": THERMAL_PRINT_ENABLED,
|
||||||
|
"batch_interval_min": BATCH_INTERVAL_MIN,
|
||||||
|
"batch_max_pending": BATCH_MAX_PENDING,
|
||||||
|
"irc_target": f"{IRC_HOST}:{IRC_PORT} {IRC_CHANNEL}",
|
||||||
|
"print_web_url": PRINT_WEB_URL},
|
||||||
|
"buffer": {"depth": depth, "alertnames": alertnames,
|
||||||
|
"seconds_since_last_flush": int(time.time() - _last_flush_time),
|
||||||
|
"seconds_until_next_flush": max(0, int(BATCH_INTERVAL_MIN*60 - (time.time() - _last_flush_time)))},
|
||||||
|
"stats": _stats,
|
||||||
|
}
|
||||||
|
self.wfile.write(json.dumps(info, indent=2).encode())
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
print(f"[irc-notify] {args[0]}", file=sys.stderr)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
threading.Thread(target=digest_loop, daemon=True).start()
|
||||||
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
server = HTTPServer(("0.0.0.0", 9119), Handler)
|
||||||
print(f"IRC alert relay :9119 -> {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} (thermal: {PRINT_ENABLED})")
|
print(f"[irc-notify] :9119 -> IRC {IRC_HOST}:{IRC_PORT} {IRC_CHANNEL} | thermal={'ON' if THERMAL_PRINT_ENABLED else 'OFF'} | digest={BATCH_INTERVAL_MIN}m max={BATCH_MAX_PENDING}", file=sys.stderr)
|
||||||
server.serve_forever()
|
server.serve_forever()
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Reference in New Issue
Block a user