# ============================================================================= # ci1 — Windows Server 2025 KubeVirt VM (GitHub Actions Self-Hosted Runner) # ============================================================================= # Purpose: dedicated CI runner for FlowerCore.Updater Sandbox E2E nightly + # future fleet WPF AAT lanes. Replaces the never-registered # `bluejay-ws-sandbox-1` runner placeholder. Andrew explicitly does NOT want # BLUEJAY-WS registered as a runner (workstation has personal/operator state). # # Status (2026-05-08): LIVE — Phase 1 prereqs satisfied: # * Multus CNI v4.2.2 thick-plugin DaemonSet running on all 3 RKE2 nodes # (apps/multus/multus.yaml; ApplicationSet `infra-multus` Synced/Healthy) # * CDI v1.65.0 operator + CR Deployed (apps/cdi/; ApplicationSet # `infra-cdi` Synced/Healthy; uploadproxy reachable via kubectl port-forward) # * Windows Server 2025 ISO uploaded via CDI virtctl image-upload to # PVC windows-server-2025-iso (7.7 GiB → 10Gi PVC, Bound, Upload Complete) # * Local Administrator password generated, stored in 1Password vault # IAmWorkin (qaphopopkryhbg353ukzhhuqoq) item id h3ix4mgfk65gmkcmvh6ly3d3hu # * NetworkAttachmentDefinition prod-vlan57 registered (apps/kubevirt-vms/ # prod-vlan57-nad.yaml). VM still uses pod-network masquerade until Phase 1.5 # host bridge work lands (Puppet br-prod + enp86s0.57); switching is a # one-line YAML edit + git push. # # See docs/infrastructure/windows-server-build-runner-plan.md "Phase 1 readiness gate". # # Network choice in this draft: **pod-network fallback** (Calico default). # Outbound-only is fine for the Updater Sandbox E2E runner workload (the runner # polls GitHub Actions over HTTPS; no inbound listener needed). Switch to a # Multus PROD VLAN NetworkAttachmentDefinition once Multus is installed and the # operator wants L2 access from `ci1` to other PROD VLAN services. # # Sizing: 8 vCPU / 16 GB RAM / 200 GB disk on Longhorn (default storageClass). # Capacity check 2026-05-08: each RKE2 node has 16 vCPU / ~64Gi allocatable; # 8 vCPU is ~17% of one node's allocatable, fits comfortably. # # Apply (after operator approval + ISO loaded): # kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml apply -f apps/kubevirt-vms/ci1.yaml # # Connect to console for Windows install: # virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml vnc ci1 -n kubevirt-vms # (Or via Guacamole once a connection profile is added.) # ============================================================================= apiVersion: v1 kind: Namespace metadata: name: kubevirt-vms labels: app.kubernetes.io/part-of: kubevirt-stack pod-security.kubernetes.io/enforce: privileged --- # ISO PVC — populated via CDI virtctl image-upload (CDI is now installed). # # **Volume mode (2026-05-08 status):** Filesystem-mode PVC. A migration to # `volumeMode: Block` via DataVolume was attempted to address an OVMF SATA # CDROM read timeout, but CDI v1.65.0's upload-target pod runs as uid 107 # with `capabilities.drop: [ALL]` and cannot open the underlying block # device (`blockdev: cannot open /dev/cdi-block-volume: Permission denied`). # Reverted to Filesystem PVC pending one of: # - CDI deployment override granting CAP_SYS_RAWIO to upload pod # - Pre-populated PVC via privileged init pod that dd's the ISO directly # - Migration to a different storage class that exposes block devices # differently (e.g. iSCSI, where Longhorn's CSI mount path may behave # differently) # # Population workflow (this PVC, Filesystem mode): # 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \ # windows-server-2025-iso -n kubevirt-vms \ # --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \ # --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \ # --uploadproxy-url https://localhost:8443 --insecure # (--uploadproxy-url uses port-forward in practice: `kubectl port-forward # -n cdi service/cdi-uploadproxy 8443:443 &` first.) # # **Open boot issue:** even with the ISO at bootOrder:1, OVMF console showed: # BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...) # BdsDxe: failed to start Boot0001 ... Time out # Diagnosis confirmed PVC content IS a valid bootable ISO9660 image — the # timeout is in OVMF reading from the SATA-CDROM-backed-by-filesystem-PVC. # Block mode would likely fix it; see CDI permission issue above. apiVersion: v1 kind: PersistentVolumeClaim metadata: name: windows-server-2025-iso namespace: kubevirt-vms labels: app: ci-runner flowercore.io/managed-by: bluejay-infra spec: accessModes: - ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use resources: requests: storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom storageClassName: longhorn --- # Root disk PVC — empty 200Gi volume that Windows installs into. apiVersion: v1 kind: PersistentVolumeClaim metadata: name: ci1-rootdisk namespace: kubevirt-vms spec: accessModes: - ReadWriteOnce resources: requests: storage: 200Gi storageClassName: longhorn --- # Sysprep ConfigMap — autounattend.xml for hands-off Windows install. # Sets local Administrator password (REPLACE the placeholder), enables RDP, # enables WinRM, sets hostname, and configures static-ish networking via DHCP. # The ISO + VirtIO drivers handle the rest. apiVersion: v1 kind: ConfigMap metadata: name: ci1-autounattend namespace: kubevirt-vms data: autounattend.xml: | en-US en-US en-US en-US en-US E:\amd64\2k25 0 true 1 260 EFI 2 128 MSR 3 true Primary 1 1 FAT32 2 2 3 3 NTFS 0 3 /IMAGE/INDEX 2 true FlowerCore CI Runner FlowerCore CI1 Central Standard Time false true true true true true 3 bAA3AGsANABOAHcAcgBMAG4AeQBTAHUAYgBBAHQAaQBzAFUAcAB6AEMAWQAhADkAYQBCAEEAZABtAGkAbgBpAHMAdAByAGEAdABvAHIAUABhAHMAcwB3AG8AcgBkAA== false</PlainText> </AdministratorPassword> </UserAccounts> <FirstLogonCommands> <SynchronousCommand wcm:action="add"> <Order>1</Order> <CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Set-NetFirewallRule -DisplayGroup 'Remote Desktop' -Enabled True"</CommandLine> <Description>Enable RDP firewall rule</Description> </SynchronousCommand> <SynchronousCommand wcm:action="add"> <Order>2</Order> <CommandLine>powershell.exe -ExecutionPolicy Bypass -Command "Enable-PSRemoting -Force; Set-Item WSMan:\localhost\Service\Auth\Basic $true; Set-Item WSMan:\localhost\Service\AllowUnencrypted $true"</CommandLine> <Description>Enable WinRM (Phase 2 will pivot to HTTPS via step-ca cert)</Description> </SynchronousCommand> <SynchronousCommand wcm:action="add"> <Order>3</Order> <CommandLine>cmd.exe /c reg add "HKLM\SOFTWARE\Microsoft\Windows\CurrentVersion\Policies\System" /v EnableLUA /t REG_DWORD /d 0 /f</CommandLine> <Description>Disable UAC (Phase 2 Puppet will re-evaluate)</Description> </SynchronousCommand> </FirstLogonCommands> </component> </settings> </unattend> --- # VirtualMachine — Windows Server 2025 CI runner. apiVersion: kubevirt.io/v1 kind: VirtualMachine metadata: name: ci1 namespace: kubevirt-vms labels: app: ci-runner role: github-actions-runner flowercore.io/managed-by: bluejay-infra spec: # `running: true` is deprecated in favor of `runStrategy`. They are mutually # exclusive — KubeVirt's validating webhook rejects any VM that sets both: # admission webhook "virtualmachine-validator.kubevirt.io" denied the request: # Running and RunStrategy are mutually exclusive. # `Always` keeps a VMI running and restarts it if it crashes/exits — same # semantics as the old `running: true`. runStrategy: Always # LIVE — ISO uploaded 2026-05-08, password in 1P template: metadata: labels: app: ci-runner role: github-actions-runner kubevirt.io/vm: ci1 spec: domain: cpu: cores: 8 sockets: 1 threads: 1 memory: guest: 16Gi resources: requests: memory: 16Gi limits: memory: 16Gi clock: utc: {} timer: hpet: present: false pit: tickPolicy: delay rtc: tickPolicy: catchup hyperv: {} features: acpi: {} apic: {} hyperv: relaxed: {} vapic: {} spinlocks: spinlocks: 8191 smm: {} firmware: bootloader: efi: secureBoot: true devices: tpm: {} # Non-persistent vTPM — sufficient for runner; no BitLocker disks: # bootOrder: ISO must be 1 for first-boot install (the rootdisk has no # EFI bootloader yet). After Windows installs, it writes its own UEFI # Boot#### entries pointing at the rootdisk's EFI partition; UEFI then # boots from rootdisk going forward and the ISO at bootOrder:2 acts as # a fallback for re-install scenarios. # # Original (broken) order had rootdisk=1, windows-iso=2 — UEFI tried # the empty virtio disk first, got nothing, fell back to the SATA # CDROM at Boot0001 with a short timeout, and timed out before the # CDROM enumerated. Console showed: # BdsDxe: failed to start Boot0001 ... Time out # BdsDxe: No bootable option or device was found. # Confirmed via debug pod: PVC content IS a real bootable ISO9660 # (file: "ISO 9660 CD-ROM filesystem data ... (bootable)"), so the # only bug was boot priority. - name: windows-iso bootOrder: 1 cdrom: bus: sata - name: rootdisk bootOrder: 2 disk: bus: virtio - name: virtio-drivers cdrom: bus: sata - name: sysprep cdrom: bus: sata interfaces: # Pod-network fallback for Phase 1. To switch to PROD VLAN once Multus # + the prod-vlan57 NAD exist, replace this block with: # - name: prod-net # bridge: {} # model: virtio # and update the networks: stanza to use multus.networkName: kubevirt-vms/prod-vlan57 - name: default masquerade: {} model: virtio machine: type: q35 networks: - name: default pod: {} volumes: - name: rootdisk persistentVolumeClaim: claimName: ci1-rootdisk - name: windows-iso persistentVolumeClaim: claimName: windows-server-2025-iso - name: virtio-drivers containerDisk: # Pinned to v1.8.2 (latest stable as of 2026-05-08). # The :latest tag uses Docker manifest v1 schema which containerd # 2.1 (RKE2 v1.34.5) refuses to pull with: # "media type application/vnd.docker.distribution.manifest.v1+prettyjws # is no longer supported since containerd v2.1" # v1.8.2 is rebuilt with manifest v2/OCI and works on containerd 2.1. # Bump available: https://quay.io/repository/kubevirt/virtio-container-disk?tab=tags image: quay.io/kubevirt/virtio-container-disk:v1.8.2 - name: sysprep sysprep: configMap: name: ci1-autounattend terminationGracePeriodSeconds: 3600