# Selenium Grid 4 — RKE2 deployment # # Hub + chrome + firefox + edge browser nodes serving fleet-wide AAT runs from # the GitHub Actions self-hosted runners. ArgoCD owns this namespace from # 2026-05-25 (`infra-selenium` Application; previously these resources were # orphan kubectl-applied since 2026-03-15). # # Endpoints: # - Internal cluster: http://selenium-hub.selenium.svc.cluster.local:4444 # - LAN LoadBalancer (MetalLB): http://10.0.56.208:4444 # - Traefik public: https://selenium.iamworkin.lan # # Browser maxSessions: # - chrome 2 (bumped from 1 on 2026-05-25 morning-routine — AAT-heavy # Print.Web help-screenshots was the global bottleneck; # see commit history for ops/runner-replica-rightsize) # - firefox 1 # - edge 1 # # Screenshots + video recording write to NFS via the chrome video sidecar. # See: CLAUDE.md "Selenium Grid & Visual AAT Testing" + bluejay-infra ADR notes. --- apiVersion: v1 kind: Service metadata: labels: app: selenium-hub app.kubernetes.io/name: selenium-hub app.kubernetes.io/part-of: selenium-grid name: selenium-hub namespace: selenium spec: ports: - name: web port: 4444 targetPort: 4444 - name: publish port: 4442 targetPort: 4442 - name: subscribe port: 4443 targetPort: 4443 selector: app: selenium-hub type: ClusterIP --- apiVersion: v1 kind: Service metadata: annotations: metallb.io/ip-allocated-from-pool: bluejay-pool metallb.universe.tf/loadBalancerIPs: 10.0.56.208 labels: app: selenium-hub component: external-access name: selenium-hub-external namespace: selenium spec: clusterIP: 10.43.90.147 clusterIPs: - 10.43.90.147 externalTrafficPolicy: Local healthCheckNodePort: 32213 ports: - name: web nodePort: 32411 port: 4444 targetPort: 4444 - name: publish nodePort: 32068 port: 4442 targetPort: 4442 - name: subscribe nodePort: 31000 port: 4443 targetPort: 4443 selector: app: selenium-hub type: LoadBalancer --- apiVersion: apps/v1 kind: Deployment metadata: labels: app: selenium-hub app.kubernetes.io/name: selenium-hub app.kubernetes.io/part-of: selenium-grid name: selenium-hub namespace: selenium spec: replicas: 1 selector: matchLabels: app: selenium-hub template: metadata: labels: app: selenium-hub app.kubernetes.io/name: selenium-hub app.kubernetes.io/part-of: selenium-grid spec: containers: - env: - name: SE_NODE_SESSION_TIMEOUT value: '300' - name: SE_SESSION_REQUEST_TIMEOUT value: '300' - name: SE_SESSION_RETRY_INTERVAL value: '5' - name: JAVA_OPTS value: -Xmx512m image: selenium/hub:4.27.0 livenessProbe: httpGet: path: /wd/hub/status port: 4444 initialDelaySeconds: 30 periodSeconds: 15 timeoutSeconds: 5 name: selenium-hub ports: - containerPort: 4444 name: web - containerPort: 4442 name: publish - containerPort: 4443 name: subscribe readinessProbe: httpGet: path: /wd/hub/status port: 4444 initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 5 # Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi # limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the # stampede-buffer pattern documented for multus # (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m # against a 500m limit, no contention. resources: limits: cpu: 500m memory: 1536Mi requests: cpu: 250m memory: 1Gi --- apiVersion: apps/v1 kind: Deployment metadata: labels: app: selenium-node-chrome app.kubernetes.io/name: selenium-node-chrome app.kubernetes.io/part-of: selenium-grid name: selenium-node-chrome namespace: selenium spec: replicas: 1 selector: matchLabels: app: selenium-node-chrome template: metadata: labels: app: selenium-node-chrome app.kubernetes.io/name: selenium-node-chrome app.kubernetes.io/part-of: selenium-grid spec: containers: - env: - name: SE_EVENT_BUS_HOST value: selenium-hub - name: SE_EVENT_BUS_PUBLISH_PORT value: '4442' - name: SE_EVENT_BUS_SUBSCRIBE_PORT value: '4443' - name: SE_NODE_MAX_SESSIONS value: '2' - name: SE_NODE_OVERRIDE_MAX_SESSIONS value: 'false' - name: SE_VNC_NO_PASSWORD value: '1' - name: SE_SCREEN_WIDTH value: '1920' - name: SE_SCREEN_HEIGHT value: '1080' - name: SE_NODE_SESSION_TIMEOUT value: '300' image: selenium/node-chrome:4.27.0 livenessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 30 periodSeconds: 15 name: selenium-chrome ports: - containerPort: 5555 name: node readinessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 15 periodSeconds: 5 # Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi # -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the # original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2 # was running 684Mi idle on the same cap. Matches the Firefox node's # tested-stable 2Gi limit. CPU unchanged. resources: limits: cpu: '1' memory: 2Gi requests: cpu: 500m memory: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm - env: - name: DISPLAY_CONTAINER_NAME value: localhost - name: SE_SCREEN_WIDTH value: '1920' - name: SE_SCREEN_HEIGHT value: '1080' - name: SE_VIDEO_FILE_NAME value: auto - name: SE_VIDEO_UPLOAD_ENABLED value: 'false' image: selenium/video:ffmpeg-7.1-20250101 name: video resources: limits: cpu: 500m memory: 768Mi requests: cpu: 250m memory: 384Mi volumeMounts: - mountPath: /videos name: selenium-videos volumes: - emptyDir: medium: Memory sizeLimit: 2Gi name: dshm - emptyDir: sizeLimit: 5Gi name: selenium-videos --- apiVersion: apps/v1 kind: Deployment metadata: labels: app: selenium-node-firefox app.kubernetes.io/name: selenium-node-firefox app.kubernetes.io/part-of: selenium-grid name: selenium-node-firefox namespace: selenium spec: replicas: 1 selector: matchLabels: app: selenium-node-firefox template: metadata: labels: app: selenium-node-firefox app.kubernetes.io/name: selenium-node-firefox app.kubernetes.io/part-of: selenium-grid spec: containers: - env: - name: SE_EVENT_BUS_HOST value: selenium-hub - name: SE_EVENT_BUS_PUBLISH_PORT value: '4442' - name: SE_EVENT_BUS_SUBSCRIBE_PORT value: '4443' - name: SE_NODE_MAX_SESSIONS value: '1' - name: SE_NODE_OVERRIDE_MAX_SESSIONS value: 'true' - name: SE_VNC_NO_PASSWORD value: '1' - name: SE_START_VNC value: 'false' - name: SE_SCREEN_WIDTH value: '1920' - name: SE_SCREEN_HEIGHT value: '1080' - name: SE_NODE_SESSION_TIMEOUT value: '300' image: selenium/node-firefox:4.27.0 livenessProbe: failureThreshold: 5 httpGet: path: /status port: 5555 initialDelaySeconds: 30 periodSeconds: 15 timeoutSeconds: 5 name: selenium-firefox ports: - containerPort: 5555 name: node readinessProbe: failureThreshold: 5 httpGet: path: /status port: 5555 initialDelaySeconds: 15 periodSeconds: 5 timeoutSeconds: 5 resources: limits: cpu: '1' memory: 2Gi requests: cpu: 500m memory: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm volumes: - emptyDir: medium: Memory sizeLimit: 2Gi name: dshm --- apiVersion: apps/v1 kind: Deployment metadata: labels: app: selenium-node-edge app.kubernetes.io/name: selenium-node-edge app.kubernetes.io/part-of: selenium-grid name: selenium-node-edge namespace: selenium spec: replicas: 1 selector: matchLabels: app: selenium-node-edge template: metadata: labels: app: selenium-node-edge app.kubernetes.io/name: selenium-node-edge app.kubernetes.io/part-of: selenium-grid spec: containers: - env: - name: SE_EVENT_BUS_HOST value: selenium-hub - name: SE_EVENT_BUS_PUBLISH_PORT value: '4442' - name: SE_EVENT_BUS_SUBSCRIBE_PORT value: '4443' - name: SE_NODE_MAX_SESSIONS value: '1' - name: SE_NODE_OVERRIDE_MAX_SESSIONS value: 'true' - name: SE_VNC_NO_PASSWORD value: '1' - name: SE_SCREEN_WIDTH value: '1920' - name: SE_SCREEN_HEIGHT value: '1080' - name: SE_NODE_SESSION_TIMEOUT value: '300' image: selenium/node-edge:4.27.0 livenessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 30 periodSeconds: 15 name: selenium-edge ports: - containerPort: 5555 name: node readinessProbe: httpGet: path: /status port: 5555 initialDelaySeconds: 15 periodSeconds: 5 # Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi # -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the # original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2 # was running 684Mi idle on the same cap. Matches the Firefox node's # tested-stable 2Gi limit. CPU unchanged. resources: limits: cpu: '1' memory: 2Gi requests: cpu: 500m memory: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm volumes: - emptyDir: medium: Memory sizeLimit: 2Gi name: dshm --- apiVersion: traefik.io/v1alpha1 kind: IngressRoute metadata: name: selenium-hub namespace: selenium spec: entryPoints: - websecure routes: - kind: Rule match: Host(`selenium.iamworkin.lan`) services: - name: selenium-hub port: 4444 tls: secretName: selenium-tls