From 89b147bbdd4c80cb5f7022c3e1b957053ea8f7d1 Mon Sep 17 00:00:00 2001 From: bluejay Date: Mon, 18 May 2026 04:53:02 +0000 Subject: [PATCH] docs(openvox): document quadlet durability smoke (#12) --- README.md | 1 + .../openvoxserver-quadlet-durability.md | 84 ++++++++++++++++ scripts/monitoring/openvox-recreate-smoke.sh | 48 +++++++++ .../OpenVoxServerDurabilityTests.cs | 99 +++++++++++++++++++ 4 files changed, 232 insertions(+) create mode 100644 docs/runbooks/openvoxserver-quadlet-durability.md create mode 100755 scripts/monitoring/openvox-recreate-smoke.sh create mode 100644 tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs diff --git a/README.md b/README.md index 70bd9c8..fb17335 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `Flo ## References +- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md` - Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md` - Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md` - Public DNS operator host: `https://dns.iamworkin.lan` diff --git a/docs/runbooks/openvoxserver-quadlet-durability.md b/docs/runbooks/openvoxserver-quadlet-durability.md new file mode 100644 index 0000000..7f6cead --- /dev/null +++ b/docs/runbooks/openvoxserver-quadlet-durability.md @@ -0,0 +1,84 @@ +# openvoxserver Quadlet Durability + +This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`. + +## Current State + +As of the Sprint 32 Cx-12 apply on 2026-05-17: + +- `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key. +- `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container. +- `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed. +- `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`. +- `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present. + +## Discovery + +Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly. + +```bash +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service" +``` + +If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit. + +## Durable Fix Shape + +The Quadlet keeps the deploy key as a path reference only: + +```ini +Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts +``` + +The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs: + +```bash +/usr/bin/podman exec openvoxserver git config --global --add safe.directory * +``` + +The deploy script self-heals inside the container before it fetches the control repo: + +```bash +git config --global --add safe.directory "*" 2>/dev/null || true +DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key" +KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts" +REPO="/etc/puppetlabs/code/environments/production" +export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS" +git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true +``` + +## Validation + +Non-destructive validation: + +```bash +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l" +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand" +``` + +Destructive recreate smoke is opt-in only: + +```bash +scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh +ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh" +``` + +Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds. + +## Credential Rotation Note + +When rotating the Puppet deploy key, update the persisted serverdata copy on noc1: + +```bash +sudo install -m 0600 -o root -g root /opt/puppet/serverdata/.puppet-deploy-key +sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts" +sudo systemctl start openvoxserver-safeconfig.service +sudo systemctl start puppet-deploy.service +``` + +Never commit the deploy key or print it in logs. diff --git a/scripts/monitoring/openvox-recreate-smoke.sh b/scripts/monitoring/openvox-recreate-smoke.sh new file mode 100755 index 0000000..6e70734 --- /dev/null +++ b/scripts/monitoring/openvox-recreate-smoke.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -euo pipefail + +if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then + echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2 + exit 64 +fi + +SUDO="${SUDO:-sudo}" +REPO="/etc/puppetlabs/code/environments/production" +CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key" + +if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then + echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2 + exit 65 +fi + +before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)" +echo "Before recreate: $before" + +$SUDO systemctl stop openvoxserver +$SUDO podman rm openvoxserver 2>/dev/null || true +$SUDO systemctl start openvoxserver + +sleep 50 + +$SUDO systemctl start puppet-deploy.service +sleep 5 + +$SUDO systemctl status puppet-deploy.service --no-pager -l + +after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)" +echo "After recreate origin/master: $after" + +$SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests + +core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)" +case "$core_ssh" in + *"$CORE_SSH_COMMAND_FRAGMENT"*) ;; + *) + echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2 + exit 1 + ;; +esac + +$SUDO podman exec openvoxserver git -C "$REPO" status --short --branch + +echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure." diff --git a/tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs b/tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs new file mode 100644 index 0000000..836dbe6 --- /dev/null +++ b/tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs @@ -0,0 +1,99 @@ +using FluentAssertions; +using Xunit; + +namespace BluejayInfraLint.Tests; + +[Trait("Category", "Unit")] +public sealed class OpenVoxServerDurabilityTests +{ + private static readonly string Root = FindRepoRoot(); + private static readonly string RunbookPath = Path.Combine(Root, "docs", "runbooks", "openvoxserver-quadlet-durability.md"); + private static readonly string SmokePath = Path.Combine(Root, "scripts", "monitoring", "openvox-recreate-smoke.sh"); + + [Fact] + public void Runbook_DocumentsHostArtifactAndNonArgoPath() + { + var runbook = File.ReadAllText(RunbookPath); + + runbook.Should().Contain("noc1 host artifact"); + runbook.Should().Contain("not an ArgoCD application"); + runbook.Should().Contain("systemctl cat openvoxserver"); + runbook.Should().Contain("/etc/containers/systemd/openvoxserver.container"); + } + + [Fact] + public void Runbook_DocumentsCx12LiveApplyState() + { + var runbook = File.ReadAllText(RunbookPath); + + runbook.Should().Contain("Sprint 32 Cx-12"); + runbook.Should().Contain("openvoxserver-safeconfig.service"); + runbook.Should().Contain("/opt/puppet/r10k-deploy.sh"); + runbook.Should().Contain("HEAD == origin/master"); + } + + [Fact] + public void SmokeScript_IsExplicitlyOptIn() + { + var smoke = File.ReadAllText(SmokePath); + + smoke.Should().Contain("OPENVOX_RECREATE_SMOKE"); + smoke.Should().Contain("exit 64"); + smoke.IndexOf("OPENVOX_RECREATE_SMOKE", StringComparison.Ordinal) + .Should().BeLessThan(smoke.IndexOf("systemctl stop openvoxserver", StringComparison.Ordinal)); + } + + [Fact] + public void SmokeScript_RequiresGeneratedSystemdUnitBeforeRemovingContainer() + { + var smoke = File.ReadAllText(SmokePath); + + smoke.Should().Contain("systemctl cat openvoxserver"); + smoke.Should().Contain("refusing to remove a container without a verified systemd recreate path"); + smoke.IndexOf("systemctl cat openvoxserver", StringComparison.Ordinal) + .Should().BeLessThan(smoke.IndexOf("podman rm openvoxserver", StringComparison.Ordinal)); + } + + [Fact] + public void Artifacts_DoNotStoreSecretsOrPaidRunnerLabels() + { + var forbidden = new[] + { + "BEGIN OPENSSH PRIVATE KEY", + "BEGIN RSA PRIVATE KEY", + "ubuntu-latest", + "windows-latest", + "macos-latest", + }; + + var violations = new[] { RunbookPath, SmokePath } + .SelectMany(path => + { + var text = File.ReadAllText(path); + return forbidden + .Where(token => text.Contains(token, StringComparison.OrdinalIgnoreCase)) + .Select(token => $"{Path.GetRelativePath(Root, path)} contains forbidden token {token}"); + }) + .ToList(); + + violations.Should().BeEmpty(); + } + + private static string FindRepoRoot() + { + var current = new DirectoryInfo(AppContext.BaseDirectory); + while (current is not null) + { + if (Directory.Exists(Path.Combine(current.FullName, "apps")) + && Directory.Exists(Path.Combine(current.FullName, "scripts")) + && File.Exists(Path.Combine(current.FullName, "README.md"))) + { + return current.FullName; + } + + current = current.Parent; + } + + throw new DirectoryNotFoundException("Could not find bluejay-infra root."); + } +}