K8s manifest hardening + new bluejay-infra-lint test project
Manifest hardening (per documented memories): - apps/asterisk/deployment.yaml: dnsPolicy: None + explicit dnsConfig with ndots:2 to prevent CoreDNS *.iamworkin.lan template from hijacking external egress (downloads.asterisk.org). - apps/fc-llm-bridge/fc-llm-bridge.yaml: same dnsConfig pattern for api.anthropic.com egress. - apps/fc-ttsreader/fc-ttsreader.yaml: same dnsConfig pattern for huggingface.co model seeding. - apps/fc-messageboard/fc-messageboard.yaml: tcpSocket probes (replacing httpGet /health) per "Probes against /health 404 when app has global auth middleware". - apps/fc-signalcontrol/fc-signalcontrol.yaml: same tcpSocket probe fix. New lint project: - tests/bluejay-infra-lint/BluejayInfraLint.Tests.csproj — local-first lint test sweep for the recurring K8s gotchas in the fleet. - tests/bluejay-infra-lint/FleetManifestLintTests.cs — 7 lint tests covering tcpSocket probes, dnsConfig presence on egress-heavy pods, IngressRoute/Service namespace alignment, image pull policy, etc. - tests/bluejay-infra-lint/conftest.dev/ — matching conftest policies for environments with conftest/opa. - .gitignore — adds bin/ + obj/ + DS_Store/swp. README.md adds a "Local manifest lint" section with the canonical test command, plus 4 new gotcha entries (IngressRoute namespace split, public read-only host method allowlists, Traefik VIP netpol backend ports, auth-safe probes). Tests: 7 / 7 lint tests passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,13 +16,25 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app: asterisk
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-agent1
|
||||
hostNetwork: true
|
||||
dnsPolicy: ClusterFirstWithHostNet
|
||||
securityContext:
|
||||
fsGroup: 0
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: rke2-agent1
|
||||
hostNetwork: true
|
||||
# Keep the search list free of iamworkin.lan so CoreDNS's wildcard
|
||||
# template cannot hijack public egress like downloads.asterisk.org.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- telephony.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
securityContext:
|
||||
fsGroup: 0
|
||||
# CoreDNS in this cluster has an iamworkin.lan wildcard that catches
|
||||
# any unresolved name and returns 10.0.56.200 (Traefik VIP), which
|
||||
# means downloads.asterisk.org inside the pod resolves to Traefik and
|
||||
|
||||
@@ -87,6 +87,20 @@ spec:
|
||||
prometheus.io/port: "8080"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
# Use an explicit DNS policy so external FQDNs like api.anthropic.com are
|
||||
# resolved directly instead of being expanded through the cluster search
|
||||
# path that includes iamworkin.lan.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- fc-llm-bridge.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
securityContext:
|
||||
fsGroup: 1654
|
||||
fsGroupChangePolicy: OnRootMismatch
|
||||
@@ -211,17 +225,6 @@ spec:
|
||||
port: 8080
|
||||
initialDelaySeconds: 15
|
||||
periodSeconds: 30
|
||||
# Lower ndots so external FQDNs like api.anthropic.com are tried BEFORE
|
||||
# the ndots:5 default expands them through the cluster search path, which
|
||||
# includes iamworkin.lan. CoreDNS has a `template IN A iamworkin.lan`
|
||||
# wildcard that answers `api.anthropic.com.iamworkin.lan` with the
|
||||
# Traefik VIP, which then serves a TRAEFIK-DEFAULT-CERT TLS cert and
|
||||
# breaks egress to the real Anthropic API (memory:
|
||||
# feedback_coredns_ndots_template_collision, generalized to external DNS).
|
||||
dnsConfig:
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
|
||||
@@ -69,16 +69,14 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: 8080
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
|
||||
@@ -76,15 +76,13 @@ spec:
|
||||
memory: "512Mi"
|
||||
cpu: "500m"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: http
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
tcpSocket:
|
||||
port: http
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
|
||||
@@ -37,6 +37,19 @@ spec:
|
||||
app.kubernetes.io/name: ttsreader-piper
|
||||
app.kubernetes.io/part-of: flowercore
|
||||
spec:
|
||||
# Bypass CoreDNS's *.iamworkin.lan wildcard so the init container reaches
|
||||
# huggingface.co directly when it seeds voice models.
|
||||
dnsPolicy: None
|
||||
dnsConfig:
|
||||
nameservers:
|
||||
- 10.43.0.10
|
||||
searches:
|
||||
- fc-ttsreader.svc.cluster.local
|
||||
- svc.cluster.local
|
||||
- cluster.local
|
||||
options:
|
||||
- name: ndots
|
||||
value: "2"
|
||||
initContainers:
|
||||
- name: seed-voices
|
||||
image: rhasspy/wyoming-piper:latest
|
||||
|
||||
Reference in New Issue
Block a user