diff --git a/apps/monitoring/noc-monitoring.yaml b/apps/monitoring/noc-monitoring.yaml
index 6c63a67..1698160 100644
--- a/apps/monitoring/noc-monitoring.yaml
+++ b/apps/monitoring/noc-monitoring.yaml
@@ -974,6 +974,39 @@ data:
               summary: "Deployment {{ $labels.namespace }}/{{ $labels.deployment }} replica mismatch"
               description: "Spec wants {{ $labels.spec_replicas }} but only {{ $value }} available. Likely a rollout stuck on probe failure, scheduling, or PVC."
 
+          # Q-MR-3 (2026-05-11): multus memory pressure — catches the next OOM
+          # cascade BEFORE multus is OOM-killed cluster-wide. The 2026-05-10
+          # outage (21h) hit because no alert fired on the rising multus working
+          # set — only downstream blackbox / Traefik / service alerts. With
+          # 1Gi limit (bluejay-infra@eb8693e), 80% = ~800MiB; steady-state
+          # runs ~150-250MiB so this only fires when an avalanche starts.
+          - alert: MultusMemoryPressure
+            expr: |
+              container_memory_working_set_bytes{container="kube-multus"}
+                / container_spec_memory_limit_bytes{container="kube-multus"} > 0.8
+            for: 5m
+            labels:
+              severity: critical
+              alert_channel: thermal_print
+            annotations:
+              summary: "kube-multus memory >80% of limit on {{ $labels.node }} for 5m"
+              description: "kube-multus working set is {{ $value | humanizePercentage }} of its memory limit on node {{ $labels.node }}. If this keeps climbing, multus will OOM and all new pod networking will halt cluster-wide (precedent: 2026-05-10 outage)."
+
+          # Q-MR-3 (2026-05-11): namespace pending-pod backlog — catches the
+          # operator-leak avalanche pattern BEFORE it cascades into a multus
+          # CNI OOM. Any FC operator (RemoteDesktop / Distribution / WorldBuilder)
+          # emitting pods without ownerReferences will accumulate them when
+          # the operator crashes. >25 pending pods in any namespace for 30m
+          # is the signal to investigate the reconciler.
+          - alert: NamespacePendingPodBacklog
+            expr: sum by (namespace) (kube_pod_status_phase{phase="Pending"}) > 25
+            for: 30m
+            labels:
+              severity: warning
+            annotations:
+              summary: "Namespace {{ $labels.namespace }} has {{ $value }} Pending pods for 30m"
+              description: "Pending pod count in {{ $labels.namespace }} exceeds 25 sustained for 30m. Likely operator-leak avalanche pattern — children emitted without ownerReferences. Risk of multus CNI OOM cascade."
+
       # Longhorn storage health alerts. Required: longhorn scrape job
       # (added 2026-04-26 — see scrape_configs above). The K8s events
       # for "snapshot becomes not ready to use" are transient lifecycle