From 37cbb4ddf6c19d398286d3d54813bfda4dbcbbb1 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 15 Apr 2026 11:35:17 +0200 Subject: [PATCH 1/4] feat(gpu): bump device plugin to v0.19.1 v0.19.1 includes WSL2 CDI spec compatibility fixes. See NVIDIA/k8s-device-plugin#1671. Signed-off-by: Evan Lezar --- deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml index 1cb0ca70a..b5cd3fd2d 100644 --- a/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml +++ b/deploy/kube/gpu-manifests/nvidia-device-plugin-helmchart.yaml @@ -27,7 +27,7 @@ metadata: spec: repo: https://nvidia.github.io/k8s-device-plugin chart: nvidia-device-plugin - version: "0.18.2" + version: "0.19.1" targetNamespace: nvidia-device-plugin createNamespace: true valuesContent: |- From 9706725b79c77302ef83ed2003b8086b97292939 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 15 Apr 2026 11:35:17 +0200 Subject: [PATCH 2/4] fix(sandbox): add WSL2 GPU device and library paths to Landlock baseline On WSL2, NVIDIA GPUs are exposed through the DXG kernel driver (/dev/dxg) rather than the native nvidia* devices. CDI injects /dev/dxg as the sole GPU device node, plus GPU libraries under /usr/lib/wsl/. has_gpu_devices() previously only checked for /dev/nvidiactl, which does not exist on WSL2, so GPU enrichment never ran. This meant /dev/dxg was never permitted by Landlock and /proc write access (required by CUDA for thread naming) was never granted. Fix by: - Extending has_gpu_devices() to also detect /dev/dxg - Adding /dev/dxg to GPU_BASELINE_READ_WRITE (device nodes need O_RDWR) - Adding /usr/lib/wsl to GPU_BASELINE_READ_ONLY for CDI-injected GPU library bind-mounts that may not be covered by the /usr parent rule across filesystem boundaries The existing path existence check in enrich_proto_baseline_paths() ensures all new entries are silently skipped on native Linux where these paths do not exist. Signed-off-by: Evan Lezar --- crates/openshell-sandbox/src/lib.rs | 66 ++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 34ee80bb5..1f2a0efe5 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -1234,14 +1234,31 @@ const PROXY_BASELINE_READ_WRITE: &[&str] = &["/sandbox", "/tmp"]; /// socket at init time. If the directory exists but Landlock denies traversal /// (EACCES vs ECONNREFUSED), NVML returns `NVML_ERROR_INSUFFICIENT_PERMISSIONS` /// even though the daemon is optional. Only read/traversal access is needed. -const GPU_BASELINE_READ_ONLY: &[&str] = &["/run/nvidia-persistenced"]; +/// +/// `/usr/lib/wsl`: On WSL2, CDI bind-mounts GPU libraries (libdxcore.so, +/// libcuda.so.1.1, etc.) into paths under `/usr/lib/wsl/`. Although `/usr` +/// is already in `PROXY_BASELINE_READ_ONLY`, individual file bind-mounts may +/// not be covered by the parent-directory Landlock rule when the mount crosses +/// a filesystem boundary. Listing `/usr/lib/wsl` explicitly ensures traversal +/// is permitted regardless of Landlock's cross-mount behaviour. +const GPU_BASELINE_READ_ONLY: &[&str] = &[ + "/run/nvidia-persistenced", + "/usr/lib/wsl", // WSL2: CDI-injected GPU library directory +]; /// GPU read-write paths (static). /// /// `/dev/nvidiactl`, `/dev/nvidia-uvm`, `/dev/nvidia-uvm-tools`, -/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI. -/// Landlock restricts `open(2)` on device files even when DAC allows it; -/// these need read-write because NVML/CUDA opens them with `O_RDWR`. +/// `/dev/nvidia-modeset`: control and UVM devices injected by CDI on native +/// Linux. Landlock restricts `open(2)` on device files even when DAC allows +/// it; these need read-write because NVML/CUDA opens them with `O_RDWR`. +/// These devices do not exist on WSL2 and will be skipped by the existence +/// check in `enrich_proto_baseline_paths()`. +/// +/// `/dev/dxg`: On WSL2, NVIDIA GPUs are exposed through the DXG kernel driver +/// (DirectX Graphics) rather than the native nvidia* devices. CDI injects +/// `/dev/dxg` as the sole GPU device node; it does not exist on native Linux +/// and will be skipped there by the existence check. /// /// `/proc`: CUDA writes to `/proc//task//comm` during `cuInit()` /// to set thread names. Without write access, `cuInit()` returns error 304. @@ -1255,12 +1272,17 @@ const GPU_BASELINE_READ_WRITE: &[&str] = &[ "/dev/nvidia-uvm", "/dev/nvidia-uvm-tools", "/dev/nvidia-modeset", + "/dev/dxg", // WSL2: DXG device (GPU via DirectX kernel driver, injected by CDI) "/proc", ]; /// Returns true if GPU devices are present in the container. +/// +/// Checks both the native Linux NVIDIA control device (`/dev/nvidiactl`) and +/// the WSL2 DXG device (`/dev/dxg`). CDI injects exactly one of these +/// depending on the host kernel; the other will not exist. fn has_gpu_devices() -> bool { - std::path::Path::new("/dev/nvidiactl").exists() + std::path::Path::new("/dev/nvidiactl").exists() || std::path::Path::new("/dev/dxg").exists() } /// Enumerate per-GPU device nodes (`/dev/nvidia0`, `/dev/nvidia1`, …). @@ -1531,6 +1553,17 @@ mod baseline_tests { ); } + #[test] + fn gpu_baseline_read_write_contains_dxg() { + // /dev/dxg must be present so WSL2 sandboxes get the Landlock + // read-write rule for the CDI-injected DXG device. The existence + // check in enrich_proto_baseline_paths() skips it on native Linux. + assert!( + GPU_BASELINE_READ_WRITE.contains(&"/dev/dxg"), + "/dev/dxg must be in GPU_BASELINE_READ_WRITE for WSL2 support" + ); + } + #[test] fn local_enrichment_preserves_explicit_read_only_for_baseline_read_write_paths() { let mut policy = SandboxPolicy { @@ -1565,6 +1598,29 @@ mod baseline_tests { "baseline enrichment must not promote explicit read_only /tmp to read_write" ); } + + #[test] + fn gpu_baseline_read_only_contains_usr_lib_wsl() { + // /usr/lib/wsl must be present so CDI-injected WSL2 GPU library + // bind-mounts are accessible under Landlock. Skipped on native Linux. + assert!( + GPU_BASELINE_READ_ONLY.contains(&"/usr/lib/wsl"), + "/usr/lib/wsl must be in GPU_BASELINE_READ_ONLY for WSL2 CDI library paths" + ); + } + + #[test] + fn has_gpu_devices_reflects_dxg_or_nvidiactl() { + // Verify the OR logic: result must match the manual disjunction of + // the two path checks. Passes in all environments. + let nvidiactl = std::path::Path::new("/dev/nvidiactl").exists(); + let dxg = std::path::Path::new("/dev/dxg").exists(); + assert_eq!( + has_gpu_devices(), + nvidiactl || dxg, + "has_gpu_devices() should be true iff /dev/nvidiactl or /dev/dxg exists" + ); + } } /// Load sandbox policy from local files or gRPC. From 35935e68e2e8573236ca1ad74c8a138266d06e4f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Apr 2026 06:24:23 -0700 Subject: [PATCH 3/4] fix(helm): grant node read access to service account for GPU capacity checks Add ClusterRole and ClusterRoleBinding so the openshell service account can list nodes at the cluster scope, which is required by the GPU node capacity check in the Kubernetes driver. Signed-off-by: Evan Lezar --- .../helm/openshell/templates/clusterrole.yaml | 18 ++++++++++++++++++ .../templates/clusterrolebinding.yaml | 17 +++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 deploy/helm/openshell/templates/clusterrole.yaml create mode 100644 deploy/helm/openshell/templates/clusterrolebinding.yaml diff --git a/deploy/helm/openshell/templates/clusterrole.yaml b/deploy/helm/openshell/templates/clusterrole.yaml new file mode 100644 index 000000000..a660aee75 --- /dev/null +++ b/deploy/helm/openshell/templates/clusterrole.yaml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "openshell.fullname" . }}-node-reader + labels: + {{- include "openshell.labels" . | nindent 4 }} +rules: + - apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch diff --git a/deploy/helm/openshell/templates/clusterrolebinding.yaml b/deploy/helm/openshell/templates/clusterrolebinding.yaml new file mode 100644 index 000000000..685a73bf9 --- /dev/null +++ b/deploy/helm/openshell/templates/clusterrolebinding.yaml @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "openshell.fullname" . }}-node-reader + labels: + {{- include "openshell.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "openshell.fullname" . }}-node-reader +subjects: + - kind: ServiceAccount + name: {{ include "openshell.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} From 1415b6f96c94e9bc325cee84684af7ed69969292 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 24 Apr 2026 15:47:01 +0200 Subject: [PATCH 4/4] docs(sandbox): document GPU Landlock baseline paths and WSL2 detection Signed-off-by: Evan Lezar --- architecture/sandbox.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/architecture/sandbox.md b/architecture/sandbox.md index 5104a6dcc..1575257bd 100644 --- a/architecture/sandbox.md +++ b/architecture/sandbox.md @@ -454,6 +454,27 @@ Kernel-level error behavior (e.g., Landlock ABI unavailable) depends on `Landloc **Baseline path filtering**: System-injected baseline paths (e.g., `/app`) are pre-filtered by `enrich_proto_baseline_paths()` / `enrich_sandbox_baseline_paths()` using `Path::exists()` before they reach Landlock. If a baseline `read_write` path is already present in `read_only`, enrichment skips the promotion so explicit policy intent is preserved. User-specified paths are not pre-filtered -- they are evaluated at Landlock apply time so misconfigurations surface as warnings or errors. +#### GPU baseline paths + +When `has_gpu_devices()` returns true, the sandbox extends the Landlock baseline with GPU-specific paths defined in `crates/openshell-sandbox/src/lib.rs`. `has_gpu_devices()` checks for `/dev/nvidiactl` (native Linux) or `/dev/dxg` (WSL2) -- CDI injects exactly one depending on the host kernel. Per-GPU device files (`/dev/nvidia0`, `/dev/nvidia1`, ...) are enumerated separately at runtime by `enumerate_gpu_device_nodes()`. + +All GPU baseline entries pass through `enrich_proto_baseline_paths()`, which filters via `Path::exists()` before paths reach Landlock. Paths that do not exist on the current platform are silently skipped, so native Linux sandboxes are unaffected by WSL2 entries and vice versa. + +**`GPU_BASELINE_READ_ONLY`** + +| Path | Purpose | +|------|---------| +| `/run/nvidia-persistenced` | NVML checks for the persistence daemon socket at init. If the directory exists but Landlock denies traversal (`EACCES` instead of `ECONNREFUSED`), NVML returns `NVML_ERROR_INSUFFICIENT_PERMISSIONS` even though the daemon is optional. Only traversal access is needed. | +| `/usr/lib/wsl` | WSL2: CDI bind-mounts GPU libraries (`libdxcore.so`, `libcuda.so.1.1`, etc.) under this path. Although `/usr` is in the proxy baseline, individual file bind-mounts may not be covered by the parent-directory Landlock rule when the mount crosses a filesystem boundary. Listed explicitly to guarantee traversal regardless of Landlock's cross-mount behaviour. | + +**`GPU_BASELINE_READ_WRITE`** + +| Path | Purpose | +|------|---------| +| `/dev/nvidiactl`, `/dev/nvidia-uvm`, `/dev/nvidia-uvm-tools`, `/dev/nvidia-modeset` | Native Linux CDI-injected control and UVM devices. NVML/CUDA opens them with `O_RDWR`. Do not exist on WSL2; skipped by the existence check. | +| `/dev/dxg` | WSL2: NVIDIA GPUs are exposed through the DXG kernel driver (DirectX Graphics) rather than native `nvidia*` devices. CDI injects `/dev/dxg` as the sole GPU device node. Does not exist on native Linux; skipped there. | +| `/proc` | CUDA writes to `/proc//task//comm` during `cuInit()` to set thread names. Without write access, `cuInit()` returns error 304. Uses `/proc` rather than `/proc/self/task` because Landlock rules bind to inodes and child processes have different procfs inodes than the parent. | + ### Seccomp syscall filtering **File:** `crates/openshell-sandbox/src/sandbox/linux/seccomp.rs`