Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion TESTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ defaults to the image used by the `gateway` stage in
`deploy/docker/Dockerfile.images`; set `OPENSHELL_E2E_GPU_PROBE_IMAGE` to
override it. Per-device checks run only for NVIDIA CDI device IDs reported by
the runtime's discovered devices list, so WSL2 hosts that expose only
`nvidia.com/gpu=all` skip the index-based cases.
`nvidia.com/gpu=all` skip the index-based cases. Exact CDI device selection is
passed through `--driver-config-json` with the active Docker or Podman driver
key.

Run the Docker-backed Rust CLI e2e suite:

Expand Down
8 changes: 0 additions & 8 deletions crates/openshell-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1211,12 +1211,6 @@ enum SandboxCommands {
#[arg(long)]
gpu: bool,

/// Target a driver-specific GPU device. Docker and Podman use CDI device IDs
/// (for example "nvidia.com/gpu=0"); VM uses a PCI BDF or index.
/// Only valid with --gpu. When omitted with --gpu, the driver uses its default GPU selection.
#[arg(long, requires = "gpu")]
gpu_device: Option<String>,

/// CPU limit for the sandbox (for example: 500m, 1, 2.5).
#[arg(long)]
cpu: Option<String>,
Expand Down Expand Up @@ -2552,7 +2546,6 @@ async fn main() -> Result<()> {
no_keep,
editor,
gpu,
gpu_device,
cpu,
memory,
driver_config_json,
Expand Down Expand Up @@ -2637,7 +2630,6 @@ async fn main() -> Result<()> {
&upload_specs,
keep,
gpu,
gpu_device.as_deref(),
cpu.as_deref(),
memory.as_deref(),
driver_config_json.as_deref(),
Expand Down
2 changes: 0 additions & 2 deletions crates/openshell-cli/src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1725,7 +1725,6 @@ pub async fn sandbox_create(
uploads: &[(String, Option<String>, bool)],
keep: bool,
gpu: bool,
gpu_device: Option<&str>,
cpu: Option<&str>,
memory: Option<&str>,
driver_config_json: Option<&str>,
Expand Down Expand Up @@ -1817,7 +1816,6 @@ pub async fn sandbox_create(
let request = CreateSandboxRequest {
spec: Some(SandboxSpec {
gpu: requested_gpu,
gpu_device: gpu_device.unwrap_or_default().to_string(),
environment: environment.clone(),
policy,
providers: configured_providers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,6 @@ async fn sandbox_create_keeps_command_sessions_by_default() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -827,7 +826,6 @@ async fn sandbox_create_sends_cpu_and_memory_limits_only() {
&[],
true,
false,
None,
Some("500m"),
Some("2Gi"),
None,
Expand Down Expand Up @@ -907,7 +905,6 @@ async fn sandbox_create_sends_driver_config_json() {
false,
None,
None,
None,
Some(r#"{"kubernetes":{"pod":{"priority_class_name":"batch-low"}}}"#),
None,
&[],
Expand Down Expand Up @@ -984,7 +981,6 @@ async fn sandbox_create_does_not_infer_command_providers_when_v2_enabled() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1043,7 +1039,6 @@ async fn sandbox_create_returns_vm_error_without_waiting_for_timeout() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1098,7 +1093,6 @@ async fn sandbox_create_keeps_waiting_while_vm_progress_arrives() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1145,7 +1139,6 @@ async fn sandbox_create_times_out_when_only_logs_arrive() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1188,7 +1181,6 @@ async fn sandbox_create_deletes_command_sessions_with_no_keep() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1235,7 +1227,6 @@ async fn sandbox_create_deletes_shell_sessions_with_no_keep() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1282,7 +1273,6 @@ async fn sandbox_create_keeps_sandbox_with_hidden_keep_flag() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down Expand Up @@ -1329,7 +1319,6 @@ async fn sandbox_create_keeps_sandbox_with_forwarding() {
None,
None,
None,
None,
&[],
None,
Some(openshell_core::forward::ForwardSpec::new(forward_port)),
Expand Down Expand Up @@ -1372,7 +1361,6 @@ async fn sandbox_create_sends_environment_variables() {
None,
None,
None,
None,
&[],
None,
None,
Expand Down
4 changes: 4 additions & 0 deletions crates/openshell-core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ pub enum ComputeDriverError {
/// The requested sandbox already exists.
#[error("sandbox already exists")]
AlreadyExists,
/// The request contains an invalid argument.
#[error("{0}")]
InvalidArgument(String),
/// A precondition for the operation was not met.
#[error("{0}")]
Precondition(String),
Expand All @@ -125,6 +128,7 @@ impl From<ComputeDriverError> for tonic::Status {
fn from(err: ComputeDriverError) -> Self {
match err {
ComputeDriverError::AlreadyExists => Self::already_exists("sandbox already exists"),
ComputeDriverError::InvalidArgument(m) => Self::invalid_argument(m),
ComputeDriverError::Precondition(m) => Self::failed_precondition(m),
ComputeDriverError::Message(m) => Self::internal(m),
}
Expand Down
33 changes: 21 additions & 12 deletions crates/openshell-core/src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
use crate::config::CDI_GPU_DEVICE_ALL;

/// Resolve the existing GPU request fields into CDI device identifiers.
/// Resolve a GPU request into CDI device identifiers.
///
/// `None` means no GPU was requested. A GPU request with no explicit device
/// ID uses the CDI all-GPU request; otherwise the driver-native ID passes
/// through unchanged.
/// `None` means no GPU was requested. A GPU request with no explicit CDI
/// devices uses the CDI all-GPU request; otherwise the driver-configured CDI
/// devices pass through unchanged.
#[must_use]
pub fn cdi_gpu_device_ids(gpu: bool, gpu_device: &str) -> Option<Vec<String>> {
pub fn cdi_gpu_device_ids(gpu: bool, cdi_devices: &[String]) -> Option<Vec<String>> {
gpu.then(|| {
if gpu_device.is_empty() {
if cdi_devices.is_empty() {
vec![CDI_GPU_DEVICE_ALL.to_string()]
} else {
vec![gpu_device.to_string()]
cdi_devices.to_vec()
}
})
}
Expand All @@ -27,22 +27,31 @@ mod tests {

#[test]
fn cdi_gpu_device_ids_returns_none_when_absent() {
assert_eq!(cdi_gpu_device_ids(false, ""), None);
assert_eq!(cdi_gpu_device_ids(false, &[]), None);
}

#[test]
fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
assert_eq!(
cdi_gpu_device_ids(true, ""),
cdi_gpu_device_ids(true, &[]),
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
);
}

#[test]
fn cdi_gpu_device_ids_passes_explicit_device_id_through() {
fn cdi_gpu_device_ids_passes_explicit_device_ids_through() {
assert_eq!(
cdi_gpu_device_ids(true, "nvidia.com/gpu=0"),
Some(vec!["nvidia.com/gpu=0".to_string()])
cdi_gpu_device_ids(
true,
&[
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
]
),
Some(vec![
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
])
);
}
}
1 change: 1 addition & 0 deletions crates/openshell-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub mod net;
pub mod paths;
pub mod progress;
pub mod proto;
pub mod proto_struct;
pub mod sandbox_env;
pub mod settings;
pub mod telemetry;
Expand Down
120 changes: 120 additions & 0 deletions crates/openshell-core/src/proto_struct.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

//! Helpers for decoding `google.protobuf.Struct` values.

use serde::{Deserialize, Deserializer, de::Error as _};

/// Convert a protobuf Struct into a JSON object for typed serde decoding.
#[must_use]
pub fn struct_to_json_object(
config: &prost_types::Struct,
) -> serde_json::Map<String, serde_json::Value> {
config
.fields
.iter()
.map(|(key, value)| (key.clone(), value_to_json(value)))
.collect()
}

/// Convert a protobuf Struct into a JSON value for typed serde decoding.
#[must_use]
pub fn struct_to_json_value(config: &prost_types::Struct) -> serde_json::Value {
serde_json::Value::Object(struct_to_json_object(config))
}

/// Convert a protobuf Value into a JSON value for typed serde decoding.
#[must_use]
pub fn value_to_json(value: &prost_types::Value) -> serde_json::Value {
match value.kind.as_ref() {
Some(prost_types::value::Kind::NumberValue(num)) => serde_json::Number::from_f64(*num)
.map_or(serde_json::Value::Null, serde_json::Value::Number),
Some(prost_types::value::Kind::StringValue(val)) => serde_json::Value::String(val.clone()),
Some(prost_types::value::Kind::BoolValue(val)) => serde_json::Value::Bool(*val),
Some(prost_types::value::Kind::StructValue(val)) => {
let mut map = serde_json::Map::new();
for (key, value) in &val.fields {
map.insert(key.clone(), value_to_json(value));
}
serde_json::Value::Object(map)
}
Some(prost_types::value::Kind::ListValue(list)) => {
let values = list.values.iter().map(value_to_json).collect();
serde_json::Value::Array(values)
}
Some(prost_types::value::Kind::NullValue(_)) | None => serde_json::Value::Null,
}
}

/// Deserialize a present field as a non-empty list of non-empty strings.
///
/// Use with `#[serde(default, deserialize_with = "...")]` on
/// `Option<Vec<String>>` fields. Missing fields use the option default; present
/// fields must be arrays and cannot be empty.
pub fn deserialize_optional_non_empty_string_list<'de, D>(
deserializer: D,
) -> Result<Option<Vec<String>>, D::Error>
where
D: Deserializer<'de>,
{
let values = Vec::<String>::deserialize(deserializer)?;
if values.is_empty() {
return Err(D::Error::custom("must be a non-empty list of strings"));
}

for (idx, value) in values.iter().enumerate() {
if value.trim().is_empty() {
return Err(D::Error::custom(format!(
"[{idx}] must be a non-empty string"
)));
}
}

Ok(Some(values))
}

#[cfg(test)]
mod tests {
use super::*;

#[derive(Debug, Default, Deserialize)]
#[serde(default)]
struct TestConfig {
#[serde(
default,
deserialize_with = "deserialize_optional_non_empty_string_list"
)]
devices: Option<Vec<String>>,
}

#[test]
fn optional_non_empty_string_list_defaults_when_absent() {
let config: TestConfig = serde_json::from_value(serde_json::json!({})).unwrap();

assert_eq!(config.devices, None);
}

#[test]
fn optional_non_empty_string_list_parses_present_list() {
let config: TestConfig =
serde_json::from_value(serde_json::json!({"devices": ["nvidia.com/gpu=0"]})).unwrap();

assert_eq!(config.devices, Some(vec!["nvidia.com/gpu=0".to_string()]));
}

#[test]
fn optional_non_empty_string_list_rejects_empty_list() {
let err =
serde_json::from_value::<TestConfig>(serde_json::json!({"devices": []})).unwrap_err();

assert!(err.to_string().contains("non-empty list"));
}

#[test]
fn optional_non_empty_string_list_rejects_empty_string() {
let err =
serde_json::from_value::<TestConfig>(serde_json::json!({"devices": [""]})).unwrap_err();

assert!(err.to_string().contains("non-empty string"));
}
}
1 change: 1 addition & 0 deletions crates/openshell-driver-docker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ tempfile = "3"
url = { workspace = true }

[dev-dependencies]
prost-types = { workspace = true }
temp-env = "0.3"

[lints]
Expand Down
2 changes: 1 addition & 1 deletion crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ contract:
| `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
| `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
| `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
| CDI GPU request | Uses `driver_config.cdi_devices` when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |

The agent child process does not retain these supervisor privileges.

Expand Down
Loading
Loading