docs(openvox): document quadlet durability smoke (#12)
This commit was merged in pull request #12.
This commit is contained in:
@@ -118,6 +118,7 @@ That test project sweeps `bluejay-infra/apps/**` plus the canonical sibling `Flo
|
|||||||
|
|
||||||
## References
|
## References
|
||||||
|
|
||||||
|
- OpenVox noc1 durability runbook: `docs/runbooks/openvoxserver-quadlet-durability.md`
|
||||||
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
- Cert-manager recovery playbook: `FlowerCore.Notes/memory/project_cert_manager_recovery_2026_04_22.md`
|
||||||
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
- Why pfSense DNS is required: `FlowerCore.Notes/memory/feedback_pfsense_dns_required_for_acme.md`
|
||||||
- Public DNS operator host: `https://dns.iamworkin.lan`
|
- Public DNS operator host: `https://dns.iamworkin.lan`
|
||||||
|
|||||||
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
84
docs/runbooks/openvoxserver-quadlet-durability.md
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
# openvoxserver Quadlet Durability
|
||||||
|
|
||||||
|
This runbook documents the noc1 `openvoxserver` durability fix for the Puppet control-repo deploy path. The service is a noc1 host artifact, not an ArgoCD application, so discovery always starts on noc1 rather than in `apps/*`.
|
||||||
|
|
||||||
|
## Current State
|
||||||
|
|
||||||
|
As of the Sprint 32 Cx-12 apply on 2026-05-17:
|
||||||
|
|
||||||
|
- `/etc/containers/systemd/openvoxserver.container` has a `GIT_SSH_COMMAND` environment entry that points at the persisted serverdata deploy key.
|
||||||
|
- `/etc/systemd/system/openvoxserver-safeconfig.service` is enabled and active, and reapplies `git config --global --add safe.directory *` inside the running container.
|
||||||
|
- `/opt/puppet/r10k-deploy.sh` self-heals before each fetch by setting `safe.directory`, the repo-local `core.sshCommand`, and the persisted `known_hosts` file when needed.
|
||||||
|
- `puppet-deploy.service` exits `0/SUCCESS` after the apply and the control repo reports `HEAD == origin/master`.
|
||||||
|
- `systemctl cat openvoxserver` does not currently resolve to a generated unit on noc1. The container is running through Podman with `restart=always`, so destructive recreate smoke must not run until the generated unit is present.
|
||||||
|
|
||||||
|
## Discovery
|
||||||
|
|
||||||
|
Run every command through noc1 as `fcadmin`; do not assume BLUEJAY-WS can reach container-local surfaces directly.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "hostname && sudo -n true"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo find /etc/containers/systemd /usr/share/containers/systemd /etc/systemd/system -name 'openvoxserver*' 2>/dev/null"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo sed -n '1,220p' /etc/containers/systemd/openvoxserver.container"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl cat puppet-deploy.service"
|
||||||
|
```
|
||||||
|
|
||||||
|
If a future noc1 profile manages these files, update the Puppet control repo and let `puppet-deploy.service` apply the change. On 2026-05-17, host `puppet` was not installed, so Cx-12 used a direct noc1 host edit.
|
||||||
|
|
||||||
|
## Durable Fix Shape
|
||||||
|
|
||||||
|
The Quadlet keeps the deploy key as a path reference only:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
Environment=GIT_SSH_COMMAND=ssh -i /opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=/opt/puppetlabs/server/data/puppetserver/.known_hosts
|
||||||
|
```
|
||||||
|
|
||||||
|
The safeconfig service is intentionally independent of `openvoxserver.service` until the generated unit exists. It waits for the `openvoxserver` container name and then runs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
/usr/bin/podman exec openvoxserver git config --global --add safe.directory *
|
||||||
|
```
|
||||||
|
|
||||||
|
The deploy script self-heals inside the container before it fetches the control repo:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git config --global --add safe.directory "*" 2>/dev/null || true
|
||||||
|
DEPLOY_KEY="/opt/puppetlabs/server/data/puppetserver/.puppet-deploy-key"
|
||||||
|
KNOWN_HOSTS="/opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||||
|
REPO="/etc/puppetlabs/code/environments/production"
|
||||||
|
export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o StrictHostKeyChecking=yes -o IdentitiesOnly=yes -o UserKnownHostsFile=$KNOWN_HOSTS"
|
||||||
|
git -C "$REPO" config core.sshCommand "$GIT_SSH_COMMAND" 2>/dev/null || true
|
||||||
|
```
|
||||||
|
|
||||||
|
## Validation
|
||||||
|
|
||||||
|
Non-destructive validation:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo grep -n 'GIT_SSH_COMMAND' /etc/containers/systemd/openvoxserver.container"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl status openvoxserver-safeconfig.service --no-pager -l"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo systemctl start puppet-deploy.service && sudo systemctl status puppet-deploy.service --no-pager -l"
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "sudo podman exec openvoxserver git -C /etc/puppetlabs/code/environments/production config --get core.sshCommand"
|
||||||
|
```
|
||||||
|
|
||||||
|
Destructive recreate smoke is opt-in only:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
scp scripts/monitoring/openvox-recreate-smoke.sh fcadmin@10.0.56.10:/tmp/openvox-recreate-smoke.sh
|
||||||
|
ssh -i ~/.ssh/fcadmin_ed25519 fcadmin@10.0.56.10 "chmod +x /tmp/openvox-recreate-smoke.sh && sudo OPENVOX_RECREATE_SMOKE=1 /tmp/openvox-recreate-smoke.sh"
|
||||||
|
```
|
||||||
|
|
||||||
|
Do not run the smoke during normal sprint work. It stops and removes the production container before starting it again through systemd, and it now refuses to continue unless `systemctl cat openvoxserver` succeeds.
|
||||||
|
|
||||||
|
## Credential Rotation Note
|
||||||
|
|
||||||
|
When rotating the Puppet deploy key, update the persisted serverdata copy on noc1:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sudo install -m 0600 -o root -g root <new-deploy-key> /opt/puppet/serverdata/.puppet-deploy-key
|
||||||
|
sudo podman exec openvoxserver sh -c "ssh-keyscan github.com > /opt/puppetlabs/server/data/puppetserver/.known_hosts"
|
||||||
|
sudo systemctl start openvoxserver-safeconfig.service
|
||||||
|
sudo systemctl start puppet-deploy.service
|
||||||
|
```
|
||||||
|
|
||||||
|
Never commit the deploy key or print it in logs.
|
||||||
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
48
scripts/monitoring/openvox-recreate-smoke.sh
Executable file
@@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
if [ "${OPENVOX_RECREATE_SMOKE:-}" != "1" ]; then
|
||||||
|
echo "SKIP: set OPENVOX_RECREATE_SMOKE=1 to run the destructive openvoxserver recreate smoke." >&2
|
||||||
|
exit 64
|
||||||
|
fi
|
||||||
|
|
||||||
|
SUDO="${SUDO:-sudo}"
|
||||||
|
REPO="/etc/puppetlabs/code/environments/production"
|
||||||
|
CORE_SSH_COMMAND_FRAGMENT=".puppet-deploy-key"
|
||||||
|
|
||||||
|
if ! $SUDO systemctl cat openvoxserver >/dev/null 2>&1; then
|
||||||
|
echo "SKIP: systemctl cat openvoxserver failed; refusing to remove a container without a verified systemd recreate path." >&2
|
||||||
|
exit 65
|
||||||
|
fi
|
||||||
|
|
||||||
|
before="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short HEAD)"
|
||||||
|
echo "Before recreate: $before"
|
||||||
|
|
||||||
|
$SUDO systemctl stop openvoxserver
|
||||||
|
$SUDO podman rm openvoxserver 2>/dev/null || true
|
||||||
|
$SUDO systemctl start openvoxserver
|
||||||
|
|
||||||
|
sleep 50
|
||||||
|
|
||||||
|
$SUDO systemctl start puppet-deploy.service
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
$SUDO systemctl status puppet-deploy.service --no-pager -l
|
||||||
|
|
||||||
|
after="$($SUDO podman exec openvoxserver git -C "$REPO" rev-parse --short origin/master)"
|
||||||
|
echo "After recreate origin/master: $after"
|
||||||
|
|
||||||
|
$SUDO test -d /opt/puppet/code/environments/production/site-modules/profile/manifests
|
||||||
|
|
||||||
|
core_ssh="$($SUDO podman exec openvoxserver git -C "$REPO" config --get core.sshCommand)"
|
||||||
|
case "$core_ssh" in
|
||||||
|
*"$CORE_SSH_COMMAND_FRAGMENT"*) ;;
|
||||||
|
*)
|
||||||
|
echo "FAIL: core.sshCommand does not reference the persisted deploy key." >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
$SUDO podman exec openvoxserver git -C "$REPO" status --short --branch
|
||||||
|
|
||||||
|
echo "PASS: openvoxserver recreate smoke completed without git safety or deploy-key failure."
|
||||||
99
tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
Normal file
99
tests/bluejay-infra-lint/OpenVoxServerDurabilityTests.cs
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
using FluentAssertions;
|
||||||
|
using Xunit;
|
||||||
|
|
||||||
|
namespace BluejayInfraLint.Tests;
|
||||||
|
|
||||||
|
[Trait("Category", "Unit")]
|
||||||
|
public sealed class OpenVoxServerDurabilityTests
|
||||||
|
{
|
||||||
|
private static readonly string Root = FindRepoRoot();
|
||||||
|
private static readonly string RunbookPath = Path.Combine(Root, "docs", "runbooks", "openvoxserver-quadlet-durability.md");
|
||||||
|
private static readonly string SmokePath = Path.Combine(Root, "scripts", "monitoring", "openvox-recreate-smoke.sh");
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Runbook_DocumentsHostArtifactAndNonArgoPath()
|
||||||
|
{
|
||||||
|
var runbook = File.ReadAllText(RunbookPath);
|
||||||
|
|
||||||
|
runbook.Should().Contain("noc1 host artifact");
|
||||||
|
runbook.Should().Contain("not an ArgoCD application");
|
||||||
|
runbook.Should().Contain("systemctl cat openvoxserver");
|
||||||
|
runbook.Should().Contain("/etc/containers/systemd/openvoxserver.container");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Runbook_DocumentsCx12LiveApplyState()
|
||||||
|
{
|
||||||
|
var runbook = File.ReadAllText(RunbookPath);
|
||||||
|
|
||||||
|
runbook.Should().Contain("Sprint 32 Cx-12");
|
||||||
|
runbook.Should().Contain("openvoxserver-safeconfig.service");
|
||||||
|
runbook.Should().Contain("/opt/puppet/r10k-deploy.sh");
|
||||||
|
runbook.Should().Contain("HEAD == origin/master");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void SmokeScript_IsExplicitlyOptIn()
|
||||||
|
{
|
||||||
|
var smoke = File.ReadAllText(SmokePath);
|
||||||
|
|
||||||
|
smoke.Should().Contain("OPENVOX_RECREATE_SMOKE");
|
||||||
|
smoke.Should().Contain("exit 64");
|
||||||
|
smoke.IndexOf("OPENVOX_RECREATE_SMOKE", StringComparison.Ordinal)
|
||||||
|
.Should().BeLessThan(smoke.IndexOf("systemctl stop openvoxserver", StringComparison.Ordinal));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void SmokeScript_RequiresGeneratedSystemdUnitBeforeRemovingContainer()
|
||||||
|
{
|
||||||
|
var smoke = File.ReadAllText(SmokePath);
|
||||||
|
|
||||||
|
smoke.Should().Contain("systemctl cat openvoxserver");
|
||||||
|
smoke.Should().Contain("refusing to remove a container without a verified systemd recreate path");
|
||||||
|
smoke.IndexOf("systemctl cat openvoxserver", StringComparison.Ordinal)
|
||||||
|
.Should().BeLessThan(smoke.IndexOf("podman rm openvoxserver", StringComparison.Ordinal));
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void Artifacts_DoNotStoreSecretsOrPaidRunnerLabels()
|
||||||
|
{
|
||||||
|
var forbidden = new[]
|
||||||
|
{
|
||||||
|
"BEGIN OPENSSH PRIVATE KEY",
|
||||||
|
"BEGIN RSA PRIVATE KEY",
|
||||||
|
"ubuntu-latest",
|
||||||
|
"windows-latest",
|
||||||
|
"macos-latest",
|
||||||
|
};
|
||||||
|
|
||||||
|
var violations = new[] { RunbookPath, SmokePath }
|
||||||
|
.SelectMany(path =>
|
||||||
|
{
|
||||||
|
var text = File.ReadAllText(path);
|
||||||
|
return forbidden
|
||||||
|
.Where(token => text.Contains(token, StringComparison.OrdinalIgnoreCase))
|
||||||
|
.Select(token => $"{Path.GetRelativePath(Root, path)} contains forbidden token {token}");
|
||||||
|
})
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
violations.Should().BeEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static string FindRepoRoot()
|
||||||
|
{
|
||||||
|
var current = new DirectoryInfo(AppContext.BaseDirectory);
|
||||||
|
while (current is not null)
|
||||||
|
{
|
||||||
|
if (Directory.Exists(Path.Combine(current.FullName, "apps"))
|
||||||
|
&& Directory.Exists(Path.Combine(current.FullName, "scripts"))
|
||||||
|
&& File.Exists(Path.Combine(current.FullName, "README.md")))
|
||||||
|
{
|
||||||
|
return current.FullName;
|
||||||
|
}
|
||||||
|
|
||||||
|
current = current.Parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new DirectoryNotFoundException("Could not find bluejay-infra root.");
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user