From 0bf47dfa33ab34b1f7e81feab4a3124c66df9be9 Mon Sep 17 00:00:00 2001 From: Codex Date: Fri, 8 May 2026 14:23:31 -0500 Subject: [PATCH] fix(ci1): switch ISO from filesystem PVC to Block-mode DataVolume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bootOrder swap alone didn't fix the install — even with `windows-iso` at bootOrder:1, OVMF UEFI still timed out reading the SATA CDROM: BdsDxe: starting Boot0001 "UEFI QEMU DVD-ROM QM00001 " from ... Sata(...) BdsDxe: failed to start Boot0001 ... : Time out BdsDxe: No bootable option or device was found. Diagnosis (debug pod mounting the live PVC): - /pvc/disk.img IS a valid bootable ISO9660 image — `file` reports "ISO 9660 CD-ROM filesystem data 'SSS_X64FRE_EN-US_DV9' (bootable)". - bytes 0..15: zeros (NOT QCOW2 magic 51 46 49 fb). - bytes 32769..32773: "CD001" — ISO9660 primary volume descriptor at the correct offset. So content was fine. The bug is in how KubeVirt + QEMU + Longhorn expose a Filesystem-mode PVC's `/disk.img` as a SATA CDROM. With Block-mode the underlying volume IS the raw ISO9660 sectors, OVMF reads them directly, no QEMU file-emulation layer. This is the recommended pattern for ISO install media on KubeVirt + Longhorn. Migration: - Replace `kind: PersistentVolumeClaim` with `kind: DataVolume` (CDI manages the underlying PVC + upload-target pod). - Set `pvc.volumeMode: Block`. - Annotate `cdi.kubevirt.io/storage.contentType: kubevirt` so CDI keeps raw bytes (no QCOW2 wrap). - VM volume reference changes from `persistentVolumeClaim.claimName` to `dataVolume.name`. KubeVirt's VMI controller blocks VM start until DV phase is Succeeded (upload completed). Operator step after this lands: 1. Wait for DV `phase: UploadReady` kubectl get dv -n kubevirt-vms windows-server-2025-iso -w 2. virtctl image-upload dv windows-server-2025-iso -n kubevirt-vms \ --image-path "...\en-us_windows_server_2025...iso" \ --uploadproxy-url https://localhost:8443 --insecure --no-create 3. Re-flip runStrategy to Always (was set to Halted live-side during migration; this commit keeps the manifest at Always). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/kubevirt-vms/ci1.yaml | 69 +++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 20 deletions(-) diff --git a/apps/kubevirt-vms/ci1.yaml b/apps/kubevirt-vms/ci1.yaml index 1a8daac..9fb54d4 100644 --- a/apps/kubevirt-vms/ci1.yaml +++ b/apps/kubevirt-vms/ci1.yaml @@ -49,32 +49,58 @@ metadata: pod-security.kubernetes.io/enforce: privileged --- -# ISO PVC — populated via CDI virtctl image-upload (CDI is now installed). -# Population workflow (LIVE 2026-05-08): -# 1. virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload pvc \ -# windows-server-2025-iso -n kubevirt-vms \ -# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \ -# --size 10Gi --storage-class longhorn --access-mode ReadWriteOnce \ -# --uploadproxy-url https://cdi-uploadproxy.cdi.svc:443 --insecure -# (--uploadproxy-url uses port-forward in practice: see plan doc Phase 1.5.) +# ISO DataVolume — CDI manages an underlying PVC of the same name and exposes +# the upload-target pod once it's ready. # -# Note: CDI's PVC creation hooks add cdi.kubevirt.io/storage.* annotations -# automatically. The ISO source file is 7.7GB → request 10Gi for headroom. -apiVersion: v1 -kind: PersistentVolumeClaim +# **Why DataVolume + Block volumeMode** (vs the original `kind: PersistentVolumeClaim` +# + virtctl image-upload pvc): a `volumeMode: Filesystem` PVC stores the upload +# as `/disk.img` on a mounted ext4. KubeVirt then exposes that file as a SATA +# CDROM via QEMU. On 2026-05-08 this caused the OVMF UEFI firmware to fail +# Boot0001 with "Time out" reading the SATA CDROM, even with the install ISO +# at bootOrder:1 — see docs/infrastructure/feedback notes below. The ISO +# content WAS valid (`file` reported "ISO 9660 CD-ROM filesystem data ... +# (bootable)"), but the QEMU SATA emulation over a Filesystem-PVC backing was +# too slow / mis-attached for OVMF's CDROM read window. +# +# `volumeMode: Block` gives us a raw block device directly — KubeVirt attaches +# it to the VM as `/dev/sdX` style storage, OVMF reads ISO9660 sectors directly +# from the underlying block volume, no QEMU virtual file emulation needed. +# This is the recommended pattern for ISO install media on KubeVirt + Longhorn. +# +# Population workflow: +# 1. After this DataVolume is applied, CDI creates the PVC and an +# upload-target pod. Wait for `phase: UploadReady`. +# 2. From BLUEJAY-WS: +# kubectl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml port-forward \ +# -n cdi service/cdi-uploadproxy 8443:443 & +# virtctl --kubeconfig $env:USERPROFILE\.kube\rke2.yaml image-upload dv \ +# windows-server-2025-iso -n kubevirt-vms \ +# --image-path "$env:USERPROFILE\Downloads\en-us_windows_server_2025_updated_march_2026_x64_dvd_8e06425a.iso" \ +# --uploadproxy-url https://localhost:8443 --insecure --no-create +# (`--no-create` — the DV/PVC already exist, virtctl just streams bytes.) +apiVersion: cdi.kubevirt.io/v1beta1 +kind: DataVolume metadata: name: windows-server-2025-iso namespace: kubevirt-vms labels: app: ci-runner flowercore.io/managed-by: bluejay-infra + annotations: + # Tell CDI not to "convert" — keep raw bytes so the underlying block device + # IS the ISO9660 sectors verbatim, not a QCOW2 wrap. + cdi.kubevirt.io/storage.contentType: kubevirt spec: - accessModes: - - ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use - resources: - requests: - storage: 10Gi # Bumped from 6Gi (Server 2025 ISO is 7.7GB) - storageClassName: longhorn + source: + upload: {} + pvc: + accessModes: + - ReadWriteOnce # Bump to ReadOnlyMany after population for multi-VM use + resources: + requests: + storage: 10Gi # Server 2025 ISO is 7.7GB; 10Gi for headroom + volumeMode: Block # CRITICAL — see header comment above + storageClassName: longhorn --- # Root disk PVC — empty 200Gi volume that Windows installs into. @@ -384,8 +410,11 @@ spec: persistentVolumeClaim: claimName: ci1-rootdisk - name: windows-iso - persistentVolumeClaim: - claimName: windows-server-2025-iso + # Reference the DataVolume (defined above) — CDI creates the PVC of + # the same name with volumeMode: Block. The VMI controller blocks + # VM start until DV phase is Succeeded (i.e. upload completed). + dataVolume: + name: windows-server-2025-iso - name: virtio-drivers containerDisk: # Pinned to v1.8.2 (latest stable as of 2026-05-08).