From 7310fb88c2ae6824bdf79a0c20ca4fcbfeccba3c Mon Sep 17 00:00:00 2001 From: Andrew Stoltz Date: Mon, 25 May 2026 20:11:41 -0500 Subject: [PATCH] selenium: right-size hub + chrome + edge memory limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Edge node has been OOMKilled 51 times in 5 days (~1 every 2.4h) on a 1Gi memory limit. Chrome runs maxSessions=2 on the same 1Gi cap and was idling at 684Mi — first concurrent session pushing the node to ~900Mi+ would be the next OOM. Hub was running at 766Mi against a 1Gi limit (75%); no recent restarts but no headroom either. Firefox node has been running at 2Gi memory limit for 9 days with zero restarts — that is the right size for a Selenium 4.27 browser node under our session profile (screen recording sidecar + 1080p rendering + page captures). Match it. Changes: - Hub: limit 1Gi -> 1.5Gi, request 512Mi -> 1Gi - Chrome: limit 1Gi -> 2Gi, request 512Mi -> 1Gi - Edge: limit 1Gi -> 2Gi, request 512Mi -> 1Gi CPU left alone on all three — observed utilization is well under the existing limits (hub 54m / 500m, chrome 185m / 1, edge 11m / 1). Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/selenium/selenium-grid.yaml | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/apps/selenium/selenium-grid.yaml b/apps/selenium/selenium-grid.yaml index 7f207c0..62d84a1 100644 --- a/apps/selenium/selenium-grid.yaml +++ b/apps/selenium/selenium-grid.yaml @@ -132,13 +132,18 @@ spec: initialDelaySeconds: 10 periodSeconds: 5 timeoutSeconds: 5 + # Hub baseline working set ~766Mi on 2026-05-25 (75% of prior 1Gi + # limit). Bump to 1.5Gi / 1Gi to keep ~50% headroom; matches the + # stampede-buffer pattern documented for multus + # (feedback_k8s_cni_multus_sizing). CPU left alone — observed 54m + # against a 500m limit, no contention. resources: limits: cpu: 500m - memory: 1Gi + memory: 1536Mi requests: cpu: 250m - memory: 512Mi + memory: 1Gi --- apiVersion: apps/v1 kind: Deployment @@ -198,13 +203,18 @@ spec: port: 5555 initialDelaySeconds: 15 periodSeconds: 5 + # Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi + # -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the + # original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2 + # was running 684Mi idle on the same cap. Matches the Firefox node's + # tested-stable 2Gi limit. CPU unchanged. resources: limits: cpu: '1' - memory: 1Gi + memory: 2Gi requests: cpu: 500m - memory: 512Mi + memory: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm @@ -378,13 +388,18 @@ spec: port: 5555 initialDelaySeconds: 15 periodSeconds: 5 + # Chromium-based browser node. Bumped from 1Gi -> 2Gi (req 512Mi + # -> 1Gi) on 2026-05-25 — Edge had 51 OOMKills in 5d on the + # original 1Gi cap (~1 OOM every 2.4h), and Chrome at maxSessions=2 + # was running 684Mi idle on the same cap. Matches the Firefox node's + # tested-stable 2Gi limit. CPU unchanged. resources: limits: cpu: '1' - memory: 1Gi + memory: 2Gi requests: cpu: 500m - memory: 512Mi + memory: 1Gi volumeMounts: - mountPath: /dev/shm name: dshm -- 2.49.1