NVIDIA · elezar · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
@@ -82,7 +82,10 @@ users.
 Custom sandbox images must include the agent runtime and any system
 dependencies, but they should not need to include the gateway. GPU-capable
 images must include the user-space libraries required by the workload. The
-runtime still owns GPU device injection.
+runtime still owns GPU device injection. GPU requests can include a driver-native
+device identifier or a requested count; the gateway validates the request shape
+and each runtime enforces the GPU allocation modes it supports. Kubernetes uses
+counted `nvidia.com/gpu` resources and rejects driver-native device identifiers.
 
 ## Deployment Shape
 

@@ -1215,10 +1215,15 @@ enum SandboxCommands {
 
         /// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
         /// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
-        /// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
-        #[arg(long, requires = "gpu")]
+        /// Specifying --gpu-device also requests GPU resources.
+        /// When omitted with --gpu, the driver uses its default GPU selection.
+        #[arg(long, conflicts_with = "gpu_count")]
         gpu_device: Option<String>,
 
+        /// Request a specific number of GPUs. Mutually exclusive with --gpu-device.
+        #[arg(long, value_parser = clap::value_parser!(u32).range(1..), conflicts_with = "gpu_device")]
+        gpu_count: Option<u32>,
+
         /// CPU limit for the sandbox (for example: 500m, 1, 2.5).
         #[arg(long)]
         cpu: Option<String>,
@@ -2547,6 +2552,7 @@ async fn main() -> Result<()> {
                     editor,
                     gpu,
                     gpu_device,
+                    gpu_count,
                     cpu,
                     memory,
                     driver_config_json,
@@ -2628,6 +2634,7 @@ async fn main() -> Result<()> {
                         keep,
                         gpu,
                         gpu_device.as_deref(),
+                        gpu_count,
                         cpu.as_deref(),
                         memory.as_deref(),
                         driver_config_json.as_deref(),
@@ -4371,6 +4378,78 @@ mod tests {
         }
     }
 
+    #[test]
+    fn sandbox_create_gpu_device_parses_without_gpu_flag() {
+        let cli = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+        ])
+        .expect("sandbox create --gpu-device should parse without --gpu");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command:
+                    Some(SandboxCommands::Create {
+                        gpu, gpu_device, ..
+                    }),
+                ..
+            }) => {
+                assert!(!gpu);
+                assert_eq!(gpu_device.as_deref(), Some("nvidia.com/gpu=0"));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_parses_without_gpu_flag() {
+        let cli = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "2"])
+            .expect("sandbox create --gpu-count should parse");
+
+        match cli.command {
+            Some(Commands::Sandbox {
+                command: Some(SandboxCommands::Create { gpu, gpu_count, .. }),
+                ..
+            }) => {
+                assert!(!gpu);
+                assert_eq!(gpu_count, Some(2));
+            }
+            other => panic!("expected SandboxCommands::Create, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_rejects_zero() {
+        let result = Cli::try_parse_from(["openshell", "sandbox", "create", "--gpu-count", "0"]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create --gpu-count 0 should be rejected"
+        );
+    }
+
+    #[test]
+    fn sandbox_create_gpu_count_conflicts_with_gpu_device() {
+        let result = Cli::try_parse_from([
+            "openshell",
+            "sandbox",
+            "create",
+            "--gpu",
+            "--gpu-device",
+            "nvidia.com/gpu=0",
+            "--gpu-count",
+            "2",
+        ]);
+
+        assert!(
+            result.is_err(),
+            "sandbox create should reject --gpu-count with --gpu-device"
+        );
+    }
+
     #[test]
     fn service_expose_accepts_positional_target_port_and_service() {
         let cli = Cli::try_parse_from([

@@ -1745,6 +1745,7 @@ pub async fn sandbox_create(
     keep: bool,
     gpu: bool,
     gpu_device: Option<&str>,
+    gpu_count: Option<u32>,
     cpu: Option<&str>,
     memory: Option<&str>,
     driver_config_json: Option<&str>,
@@ -1799,7 +1800,10 @@ pub async fn sandbox_create(
         }
         None => None,
     };
-    let requested_gpu = gpu || image.as_deref().is_some_and(image_requests_gpu);
+    let requested_gpu = gpu
+        || gpu_device.is_some_and(|device_id| !device_id.is_empty())
+        || gpu_count.is_some()
+        || image.as_deref().is_some_and(image_requests_gpu);
 
     let providers_v2_enabled = gateway_providers_v2_enabled(&mut client).await?;
     let inferred_types: Vec<String> = if providers_v2_enabled {
@@ -1836,6 +1840,7 @@ pub async fn sandbox_create(
         spec: Some(SandboxSpec {
             gpu: requested_gpu,
             gpu_device: gpu_device.unwrap_or_default().to_string(),
+            gpu_count,
             policy,
             providers: configured_providers,
             template,

@@ -788,6 +788,7 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -827,6 +828,7 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() {
         true,
         false,
         None,
+        None,
         Some("500m"),
         Some("2Gi"),
         None,
@@ -886,6 +888,99 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() {
     assert!(!resources.fields.contains_key("requests"));
 }
 
+#[tokio::test]
+async fn sandbox_create_sends_gpu_device_request_without_gpu_flag() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-device"),
+        None,
+        "openshell",
+        &[],
+        true,
+        false,
+        Some("nvidia.com/gpu=0"),
+        None,
+        None,
+        None,
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        "manual",
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = create_requests(&server).await;
+    let spec = requests[0]
+        .spec
+        .as_ref()
+        .expect("sandbox spec should be sent");
+
+    assert!(spec.gpu);
+    assert_eq!(spec.gpu_device, "nvidia.com/gpu=0");
+}
+
+#[tokio::test]
+async fn sandbox_create_sends_gpu_count_request_without_gpu_flag() {
+    let server = run_server().await;
+    let fake_ssh_dir = tempfile::tempdir().unwrap();
+    let xdg_dir = tempfile::tempdir().unwrap();
+    let _env = test_env(&fake_ssh_dir, &xdg_dir);
+    let tls = test_tls(&server);
+    install_fake_ssh(&fake_ssh_dir);
+
+    run::sandbox_create(
+        &server.endpoint,
+        Some("gpu-count"),
+        None,
+        "openshell",
+        &[],
+        true,
+        false,
+        None,
+        Some(2),
+        None,
+        None,
+        None,
+        None,
+        &[],
+        None,
+        None,
+        &["echo".to_string(), "OK".to_string()],
+        Some(false),
+        Some(false),
+        &HashMap::new(),
+        "manual",
+        &tls,
+    )
+    .await
+    .expect("sandbox create should succeed");
+
+    let requests = create_requests(&server).await;
+    let spec = requests[0]
+        .spec
+        .as_ref()
+        .expect("sandbox spec should be sent");
+
+    assert!(spec.gpu);
+    assert_eq!(spec.gpu_count, Some(2));
+    assert!(spec.gpu_device.is_empty());
+}
+
 #[tokio::test]
 async fn sandbox_create_sends_driver_config_json() {
     let server = run_server().await;
@@ -906,6 +1001,7 @@ async fn sandbox_create_sends_driver_config_json() {
         None,
         None,
         None,
+        None,
         Some(r#"{"kubernetes":{"pod":{"priority_class_name":"batch-low"}}}"#),
         None,
         &[],
@@ -982,6 +1078,7 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1040,6 +1137,7 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1094,6 +1192,7 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1140,6 +1239,7 @@ async fn sandbox_create_times_out_when_only_logs_arrive() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1182,6 +1282,7 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1228,6 +1329,7 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1274,6 +1376,7 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         None,
@@ -1320,6 +1423,7 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
         None,
         None,
         None,
+        None,
         &[],
         None,
         Some(openshell_core::forward::ForwardSpec::new(forward_port)),

@@ -32,7 +32,7 @@ contract:
 | `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
 | `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
 | `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
-| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
+| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. Count-based GPU requests are rejected. |
 
 The agent child process does not retain these supervisor privileges.
 

@@ -375,7 +375,7 @@ impl DockerComputeDriver {
                 "docker sandboxes require a template image",
             ));
         }
-        Self::validate_gpu_request(spec.gpu, config.supports_gpu)?;
+        Self::validate_gpu_request(spec.gpu, spec.gpu_count, config.supports_gpu)?;
         if !template.agent_socket_path.trim().is_empty() {
             return Err(Status::failed_precondition(
                 "docker compute driver does not support template.agent_socket_path",
@@ -409,7 +409,17 @@ impl DockerComputeDriver {
         ))
     }
 
-    fn validate_gpu_request(gpu: bool, supports_gpu: bool) -> Result<(), Status> {
+    fn validate_gpu_request(
+        gpu: bool,
+        gpu_count: Option<u32>,
+        supports_gpu: bool,
+    ) -> Result<(), Status> {
+        if gpu_count.is_some() {
+            return Err(Status::invalid_argument(
+                "docker GPU count requests are not supported; use --gpu or --gpu-device",
+            ));
+        }
+
         if gpu && !supports_gpu {
             return Err(Status::failed_precondition(
                 "docker GPU sandboxes require Docker CDI support. Enable CDI on the Docker daemon, then restart the OpenShell gateway/server so GPU capability is detected.",