From e3c3d3643501872b60bc0f18beb8331ec68f04af Mon Sep 17 00:00:00 2001 From: Sagi Shnaidman Date: Tue, 9 Jun 2026 11:31:16 +0300 Subject: [PATCH 1/2] feat(podman): make container health check interval configurable The Podman driver hardcoded a 3-second health check interval, which spawns a conmon subprocess on every tick. On systems running multiple sandboxes this creates sustained process churn and unnecessary CPU overhead. Add a `health_check_interval_secs` field to `PodmanComputeConfig` (default: 10s) and wire it into the container health check spec. Operators can tune it further via `[openshell.drivers.podman]` in gateway.toml. Signed-off-by: Sagi Shnaidman --- crates/openshell-driver-podman/src/config.rs | 17 +++++++++++++++++ crates/openshell-driver-podman/src/container.rs | 15 ++++++++++++++- crates/openshell-driver-podman/src/main.rs | 1 + 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 79a463b23..67f7b3fee 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -122,8 +122,17 @@ pub struct PodmanComputeConfig { /// /// Set to `0` to leave Podman's runtime/default PID limit unchanged. pub sandbox_pids_limit: i64, + /// Health check interval in seconds for sandbox containers. + /// + /// Podman runs the health check command at this interval to determine + /// container readiness. Lower values detect readiness faster but + /// increase process churn (each check spawns a conmon subprocess). + /// Defaults to [`DEFAULT_HEALTH_CHECK_INTERVAL_SECS`]. + pub health_check_interval_secs: u64, } +pub const DEFAULT_HEALTH_CHECK_INTERVAL_SECS: u64 = 10; + impl PodmanComputeConfig { /// Returns `true` when all three TLS paths are configured. #[must_use] @@ -246,6 +255,7 @@ impl Default for PodmanComputeConfig { guest_tls_cert: None, guest_tls_key: None, sandbox_pids_limit: DEFAULT_SANDBOX_PIDS_LIMIT, + health_check_interval_secs: DEFAULT_HEALTH_CHECK_INTERVAL_SECS, } } } @@ -267,6 +277,7 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("guest_tls_cert", &self.guest_tls_cert) .field("guest_tls_key", &self.guest_tls_key) .field("sandbox_pids_limit", &self.sandbox_pids_limit) + .field("health_check_interval_secs", &self.health_check_interval_secs) .finish() } } @@ -308,6 +319,12 @@ mod tests { }); } + #[test] + fn default_config_sets_health_check_interval() { + let cfg = PodmanComputeConfig::default(); + assert_eq!(cfg.health_check_interval_secs, DEFAULT_HEALTH_CHECK_INTERVAL_SECS); + } + #[test] fn default_config_sets_driver_owned_pids_limit() { let cfg = PodmanComputeConfig::default(); diff --git a/crates/openshell-driver-podman/src/container.rs b/crates/openshell-driver-podman/src/container.rs index 13f053e93..e37d5f214 100644 --- a/crates/openshell-driver-podman/src/container.rs +++ b/crates/openshell-driver-podman/src/container.rs @@ -530,7 +530,7 @@ pub fn build_container_spec_with_token( openshell_core::config::DEFAULT_SSH_PORT ), ], - interval: 3_000_000_000, + interval: config.health_check_interval_secs * 1_000_000_000, timeout: 2_000_000_000, retries: 10, start_period: 5_000_000_000, @@ -940,6 +940,19 @@ mod tests { ); } + #[test] + fn container_spec_healthcheck_interval_from_config() { + let sandbox = test_sandbox("test-id", "test-name"); + let mut config = test_config(); + config.health_check_interval_secs = 30; + let spec = build_container_spec(&sandbox, &config); + + let interval = spec["healthconfig"]["Interval"] + .as_u64() + .expect("healthcheck interval should be a u64"); + assert_eq!(interval, 30_000_000_000); + } + #[test] fn container_spec_required_vars_cannot_be_overridden() { use openshell_core::proto::compute::v1::{DriverSandboxSpec, DriverSandboxTemplate}; diff --git a/crates/openshell-driver-podman/src/main.rs b/crates/openshell-driver-podman/src/main.rs index 53af4e190..e6ba7b9ff 100644 --- a/crates/openshell-driver-podman/src/main.rs +++ b/crates/openshell-driver-podman/src/main.rs @@ -135,6 +135,7 @@ async fn main() -> Result<()> { guest_tls_cert: args.podman_tls_cert, guest_tls_key: args.podman_tls_key, sandbox_pids_limit: args.sandbox_pids_limit, + ..PodmanComputeConfig::default() }) .await .into_diagnostic()?; From 1d4d7aeaaa119cd516bb54eadafe34783adef904 Mon Sep 17 00:00:00 2001 From: Sagi Shnaidman Date: Tue, 9 Jun 2026 11:59:41 +0300 Subject: [PATCH 2/2] docs(podman): document health_check_interval_secs config field Signed-off-by: Sagi Shnaidman --- crates/openshell-driver-podman/src/config.rs | 10 ++++++++-- docs/reference/gateway-config.mdx | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crates/openshell-driver-podman/src/config.rs b/crates/openshell-driver-podman/src/config.rs index 67f7b3fee..970c71a18 100644 --- a/crates/openshell-driver-podman/src/config.rs +++ b/crates/openshell-driver-podman/src/config.rs @@ -277,7 +277,10 @@ impl std::fmt::Debug for PodmanComputeConfig { .field("guest_tls_cert", &self.guest_tls_cert) .field("guest_tls_key", &self.guest_tls_key) .field("sandbox_pids_limit", &self.sandbox_pids_limit) - .field("health_check_interval_secs", &self.health_check_interval_secs) + .field( + "health_check_interval_secs", + &self.health_check_interval_secs, + ) .finish() } } @@ -322,7 +325,10 @@ mod tests { #[test] fn default_config_sets_health_check_interval() { let cfg = PodmanComputeConfig::default(); - assert_eq!(cfg.health_check_interval_secs, DEFAULT_HEALTH_CHECK_INTERVAL_SECS); + assert_eq!( + cfg.health_check_interval_secs, + DEFAULT_HEALTH_CHECK_INTERVAL_SECS + ); } #[test] diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index c70d8acbd..75d0c05f0 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -252,6 +252,10 @@ guest_tls_cert = "/etc/openshell/certs/client.pem" guest_tls_key = "/etc/openshell/certs/client-key.pem" # Set to 0 to leave Podman's runtime default unchanged. sandbox_pids_limit = 2048 +# Health check interval in seconds. Lower values detect readiness faster +# but increase process churn (each check spawns a conmon subprocess). +# Default: 10. +health_check_interval_secs = 10 ``` ### MicroVM