From 57110885030ff6a7b43866cc00a740903bc2b4a9 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Tue, 7 Apr 2026 12:13:05 -0700 Subject: [PATCH 01/15] Refactor SandboxMemoryLayout Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_common/src/mem.rs | 42 ++ .../src/hypervisor/hyperlight_vm/x86_64.rs | 2 +- src/hyperlight_host/src/mem/layout.rs | 398 +++++------------- src/hyperlight_host/src/mem/mgr.rs | 14 +- .../src/sandbox/uninitialized_evolve.rs | 2 +- 5 files changed, 161 insertions(+), 297 deletions(-) diff --git a/src/hyperlight_common/src/mem.rs b/src/hyperlight_common/src/mem.rs index fb850acc8..fea8413bd 100644 --- a/src/hyperlight_common/src/mem.rs +++ b/src/hyperlight_common/src/mem.rs @@ -28,6 +28,23 @@ pub struct GuestMemoryRegion { pub ptr: u64, } +impl GuestMemoryRegion { + /// Size of a serialized `GuestMemoryRegion` in bytes. + pub const SERIALIZED_SIZE: usize = core::mem::size_of::(); + + /// Write this region's fields in native-endian byte order to `buf`. + /// Returns `Ok(())` on success, or `Err` if `buf` is too small. + pub fn write_to(&self, buf: &mut [u8]) -> Result<(), &'static str> { + if buf.len() < Self::SERIALIZED_SIZE { + return Err("buffer too small for GuestMemoryRegion"); + } + let s = core::mem::size_of::(); + buf[..s].copy_from_slice(&self.size.to_ne_bytes()); + buf[s..s * 2].copy_from_slice(&self.ptr.to_ne_bytes()); + Ok(()) + } +} + /// Maximum length of a file mapping label (excluding null terminator). pub const FILE_MAPPING_LABEL_MAX_LEN: usize = 63; @@ -80,3 +97,28 @@ pub struct HyperlightPEB { #[cfg(feature = "nanvix-unstable")] pub file_mappings: GuestMemoryRegion, } + +impl HyperlightPEB { + /// Write the PEB fields in native-endian byte order to `buf`. + /// The buffer must be at least `size_of::()` bytes. + /// Returns `Err` if the buffer is too small. + pub fn write_to(&self, buf: &mut [u8]) -> Result<(), &'static str> { + if buf.len() < core::mem::size_of::() { + return Err("buffer too small for HyperlightPEB"); + } + let regions = [ + &self.input_stack, + &self.output_stack, + &self.init_data, + &self.guest_heap, + #[cfg(feature = "nanvix-unstable")] + &self.file_mappings, + ]; + let mut offset = 0; + for region in regions { + region.write_to(&mut buf[offset..])?; + offset += GuestMemoryRegion::SERIALIZED_SIZE; + } + Ok(()) + } +} diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index f06c94964..857490600 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -1489,7 +1489,7 @@ mod tests { let (mut hshm, gshm) = mem_mgr.build().unwrap(); - let peb_address = gshm.layout.peb_address; + let peb_address = gshm.layout.peb_address(); let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + 1; diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 26615d579..039fdb21c 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -60,8 +60,9 @@ limitations under the License. //! | Input Data | //! +-------------------------------------------+ (scratch size) -use std::fmt::Debug; -use std::mem::{offset_of, size_of}; +#[cfg(feature = "nanvix-unstable")] +use std::mem::offset_of; +use std::mem::size_of; use hyperlight_common::mem::{HyperlightPEB, PAGE_SIZE_USIZE}; use tracing::{Span, instrument}; @@ -213,100 +214,27 @@ impl ResolvedGpa { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct SandboxMemoryLayout { - pub(super) sandbox_memory_config: SandboxConfiguration, + /// Input data buffer size (from SandboxConfiguration). + pub(crate) input_data_size: usize, + /// Output data buffer size (from SandboxConfiguration). + pub(crate) output_data_size: usize, /// The heap size of this sandbox. - pub(super) heap_size: usize, + pub(crate) heap_size: usize, + /// The size of the guest code section. + pub(crate) code_size: usize, + /// The size of the init data section (guest blob). init_data_size: usize, - - /// The following fields are offsets to the actual PEB struct fields. - /// They are used when writing the PEB struct itself - peb_offset: usize, - peb_input_data_offset: usize, - peb_output_data_offset: usize, - peb_init_data_offset: usize, - peb_heap_data_offset: usize, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset: usize, - - guest_heap_buffer_offset: usize, - init_data_offset: usize, - pt_size: Option, - - // other - pub(crate) peb_address: usize, - code_size: usize, - // The offset in the sandbox memory where the code starts - guest_code_offset: usize, + /// Permission flags for the init data region. #[cfg_attr(feature = "i686-guest", allow(unused))] - pub(crate) init_data_permissions: Option, - - // The size of the scratch region in physical memory; note that - // this will appear under the top of physical memory. + init_data_permissions: Option, + /// The size of the scratch region in physical memory. scratch_size: usize, - // The guest-visible size of the snapshot region in physical - // memory. After compaction this may be smaller than the full - // snapshot blob (which also contains a PT tail that is only - // host-accessible). + /// The size of the snapshot region in physical memory. snapshot_size: usize, -} - -impl Debug for SandboxMemoryLayout { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut ff = f.debug_struct("SandboxMemoryLayout"); - ff.field( - "Total Memory Size", - &format_args!("{:#x}", self.get_memory_size().unwrap_or(0)), - ) - .field("Heap Size", &format_args!("{:#x}", self.heap_size)) - .field( - "Init Data Size", - &format_args!("{:#x}", self.init_data_size), - ) - .field("PEB Address", &format_args!("{:#x}", self.peb_address)) - .field("PEB Offset", &format_args!("{:#x}", self.peb_offset)) - .field("Code Size", &format_args!("{:#x}", self.code_size)) - .field( - "Input Data Offset", - &format_args!("{:#x}", self.peb_input_data_offset), - ) - .field( - "Output Data Offset", - &format_args!("{:#x}", self.peb_output_data_offset), - ) - .field( - "Init Data Offset", - &format_args!("{:#x}", self.peb_init_data_offset), - ) - .field( - "Guest Heap Offset", - &format_args!("{:#x}", self.peb_heap_data_offset), - ); - #[cfg(feature = "nanvix-unstable")] - ff.field( - "File Mappings Offset", - &format_args!("{:#x}", self.peb_file_mappings_offset), - ); - ff.field( - "Guest Heap Buffer Offset", - &format_args!("{:#x}", self.guest_heap_buffer_offset), - ) - .field( - "Init Data Offset", - &format_args!("{:#x}", self.init_data_offset), - ) - .field("PT Size", &format_args!("{:#x}", self.pt_size.unwrap_or(0))) - .field( - "Guest Code Offset", - &format_args!("{:#x}", self.guest_code_offset), - ) - .field( - "Scratch region size", - &format_args!("{:#x}", self.scratch_size), - ) - .finish() - } + /// The size of the page tables (None if not yet set). + pt_size: Option, } impl SandboxMemoryLayout { @@ -337,65 +265,19 @@ impl SandboxMemoryLayout { if scratch_size > Self::MAX_MEMORY_SIZE { return Err(MemoryRequestTooBig(scratch_size, Self::MAX_MEMORY_SIZE)); } - let min_scratch_size = hyperlight_common::layout::min_scratch_size( - cfg.get_input_data_size(), - cfg.get_output_data_size(), - ); + let input_data_size = cfg.get_input_data_size(); + let output_data_size = cfg.get_output_data_size(); + let min_scratch_size = + hyperlight_common::layout::min_scratch_size(input_data_size, output_data_size); if scratch_size < min_scratch_size { return Err(MemoryRequestTooSmall(scratch_size, min_scratch_size)); } - let guest_code_offset = 0; - // The following offsets are to the fields of the PEB struct itself! - let peb_offset = code_size.next_multiple_of(PAGE_SIZE_USIZE); - let peb_input_data_offset = peb_offset + offset_of!(HyperlightPEB, input_stack); - let peb_output_data_offset = peb_offset + offset_of!(HyperlightPEB, output_stack); - let peb_init_data_offset = peb_offset + offset_of!(HyperlightPEB, init_data); - let peb_heap_data_offset = peb_offset + offset_of!(HyperlightPEB, guest_heap); - #[cfg(feature = "nanvix-unstable")] - let peb_file_mappings_offset = peb_offset + offset_of!(HyperlightPEB, file_mappings); - - // The following offsets are the actual values that relate to memory layout, - // which are written to PEB struct - let peb_address = Self::BASE_ADDRESS + peb_offset; - // make sure heap buffer starts at 4K boundary. - // The FileMappingInfo array is stored immediately after the PEB struct. - // We statically reserve space for MAX_FILE_MAPPINGS entries so that - // the heap never overlaps the array, even when all slots are used. - // The host writes file mapping metadata here via write_file_mapping_entry; - // the guest only reads the entries. We don't know at layout time how - // many file mappings the host will register, so we reserve space for - // the maximum number. - // The heap starts at the next page boundary after this reserved area. - #[cfg(feature = "nanvix-unstable")] - let file_mappings_array_end = peb_offset - + size_of::() - + hyperlight_common::mem::MAX_FILE_MAPPINGS - * size_of::(); - #[cfg(feature = "nanvix-unstable")] - let guest_heap_buffer_offset = file_mappings_array_end.next_multiple_of(PAGE_SIZE_USIZE); - #[cfg(not(feature = "nanvix-unstable"))] - let guest_heap_buffer_offset = - (peb_offset + size_of::()).next_multiple_of(PAGE_SIZE_USIZE); - - // make sure init data starts at 4K boundary - let init_data_offset = - (guest_heap_buffer_offset + heap_size).next_multiple_of(PAGE_SIZE_USIZE); let mut ret = Self { - peb_offset, + input_data_size, + output_data_size, heap_size, - peb_input_data_offset, - peb_output_data_offset, - peb_init_data_offset, - peb_heap_data_offset, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset, - sandbox_memory_config: cfg, code_size, - guest_heap_buffer_offset, - peb_address, - guest_code_offset, - init_data_offset, init_data_size, init_data_permissions, pt_size: None, @@ -406,68 +288,64 @@ impl SandboxMemoryLayout { Ok(ret) } - /// Get the offset in guest memory to the output data size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_output_data_size_offset(&self) -> usize { - // The size field is the first field in the `OutputData` struct - self.peb_output_data_offset + /// Offset of the PEB struct within the snapshot region. + pub(crate) fn peb_offset(&self) -> usize { + self.code_size.next_multiple_of(PAGE_SIZE_USIZE) } - /// Get the offset in guest memory to the init data size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_init_data_size_offset(&self) -> usize { - // The init data size is the first field in the `GuestMemoryRegion` struct - self.peb_init_data_offset + /// Offset of the PEB file_mappings field. + #[cfg(feature = "nanvix-unstable")] + fn peb_file_mappings_offset(&self) -> usize { + self.peb_offset() + offset_of!(HyperlightPEB, file_mappings) } - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_scratch_size(&self) -> usize { - self.scratch_size + /// Guest physical address of the PEB. + pub(crate) fn peb_address(&self) -> usize { + Self::BASE_ADDRESS + self.peb_offset() } - /// Get the offset in guest memory to the output data pointer. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_output_data_pointer_offset(&self) -> usize { - // This field is immediately after the output data size field, - // which is a `u64`. - self.get_output_data_size_offset() + size_of::() + /// Offset of the guest heap buffer within the snapshot region. + pub(crate) fn guest_heap_buffer_offset(&self) -> usize { + #[cfg(feature = "nanvix-unstable")] + { + let file_mappings_array_end = self.peb_offset() + + size_of::() + + hyperlight_common::mem::MAX_FILE_MAPPINGS + * size_of::(); + file_mappings_array_end.next_multiple_of(PAGE_SIZE_USIZE) + } + #[cfg(not(feature = "nanvix-unstable"))] + { + (self.peb_offset() + size_of::()).next_multiple_of(PAGE_SIZE_USIZE) + } + } + + /// Offset of the init data section within the snapshot region. + pub(crate) fn init_data_offset(&self) -> usize { + (self.guest_heap_buffer_offset() + self.heap_size).next_multiple_of(PAGE_SIZE_USIZE) + } + + /// The code offset is always 0. + pub(crate) fn guest_code_offset(&self) -> usize { + 0 } - /// Get the offset in guest memory to the init data pointer. #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_init_data_pointer_offset(&self) -> usize { - // The init data pointer is immediately after the init data size field, - // which is a `u64`. - self.get_init_data_size_offset() + size_of::() + pub(crate) fn get_scratch_size(&self) -> usize { + self.scratch_size } /// Get the guest virtual address of the start of output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_output_data_buffer_gva(&self) -> u64 { - hyperlight_common::layout::scratch_base_gva(self.scratch_size) - + self.sandbox_memory_config.get_input_data_size() as u64 + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + self.input_data_size as u64 } /// Get the offset into the host scratch buffer of the start of /// the output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_output_data_buffer_scratch_host_offset(&self) -> usize { - self.sandbox_memory_config.get_input_data_size() - } - - /// Get the offset in guest memory to the input data size. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_input_data_size_offset(&self) -> usize { - // The input data size is the first field in the input stack's `GuestMemoryRegion` struct - self.peb_input_data_offset - } - - /// Get the offset in guest memory to the input data pointer. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_input_data_pointer_offset(&self) -> usize { - // The input data pointer is immediately after the input - // data size field in the input data `GuestMemoryRegion` struct which is a `u64`. - self.get_input_data_size_offset() + size_of::() + self.input_data_size } /// Get the guest virtual address of the start of input data @@ -487,9 +365,8 @@ impl SandboxMemoryLayout { /// location where page tables will be eagerly copied on restore #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_pt_base_scratch_offset(&self) -> usize { - (self.sandbox_memory_config.get_input_data_size() - + self.sandbox_memory_config.get_output_data_size()) - .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) + (self.input_data_size + self.output_data_size) + .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) } /// Get the base GPA to which the page tables will be eagerly @@ -507,17 +384,11 @@ impl SandboxMemoryLayout { self.get_pt_base_gpa() + self.pt_size.unwrap_or(0) as u64 } - /// Get the offset in guest memory to the heap size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_heap_size_offset(&self) -> usize { - self.peb_heap_data_offset - } - /// Get the offset in guest memory to the file_mappings count field /// (the `size` field of the `GuestMemoryRegion` in the PEB). #[cfg(feature = "nanvix-unstable")] pub(crate) fn get_file_mappings_size_offset(&self) -> usize { - self.peb_file_mappings_offset + self.peb_file_mappings_offset() } /// Get the offset in guest memory to the file_mappings pointer field. @@ -530,7 +401,7 @@ impl SandboxMemoryLayout { /// (immediately after the PEB struct, within the same page). #[cfg(feature = "nanvix-unstable")] pub(crate) fn get_file_mappings_array_offset(&self) -> usize { - self.peb_offset + size_of::() + self.peb_offset() + size_of::() } /// Get the guest address of the FileMappingInfo array. @@ -539,32 +410,24 @@ impl SandboxMemoryLayout { (Self::BASE_ADDRESS + self.get_file_mappings_array_offset()) as u64 } - /// Get the offset of the heap pointer in guest memory, - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_heap_pointer_offset(&self) -> usize { - // The heap pointer is immediately after the - // heap size field in the guest heap's `GuestMemoryRegion` struct which is a `u64`. - self.get_heap_size_offset() + size_of::() - } - /// Get the total size of guest memory in `self`'s memory /// layout. #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_unaligned_memory_size(&self) -> usize { - self.init_data_offset + self.init_data_size + self.init_data_offset() + self.init_data_size } /// get the code offset /// This is the offset in the sandbox memory where the code starts #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_code_offset(&self) -> usize { - self.guest_code_offset + self.guest_code_offset() } /// Get the guest address of the code section in the sandbox #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_code_address(&self) -> usize { - Self::BASE_ADDRESS + self.guest_code_offset + Self::BASE_ADDRESS + self.guest_code_offset() } /// Get the total size of guest memory in `self`'s memory @@ -592,8 +455,8 @@ impl SandboxMemoryLayout { #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn set_pt_size(&mut self, size: usize) -> Result<()> { let min_fixed_scratch = hyperlight_common::layout::min_scratch_size( - self.sandbox_memory_config.get_input_data_size(), - self.sandbox_memory_config.get_output_data_size(), + self.input_data_size, + self.output_data_size, ); let min_scratch = min_fixed_scratch + size; if self.scratch_size < min_scratch { @@ -632,7 +495,7 @@ impl SandboxMemoryLayout { Code, ); - let expected_peb_offset = TryInto::::try_into(self.peb_offset)?; + let expected_peb_offset = TryInto::::try_into(self.peb_offset())?; if peb_offset != expected_peb_offset { return Err(new_error!( @@ -658,7 +521,7 @@ impl SandboxMemoryLayout { let heap_offset = builder.push_page_aligned(size_of::(), MemoryRegionFlags::READ, Peb); - let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset)?; + let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset())?; if heap_offset != expected_heap_offset { return Err(new_error!( @@ -682,7 +545,7 @@ impl SandboxMemoryLayout { Heap, ); - let expected_init_data_offset = TryInto::::try_into(self.init_data_offset)?; + let expected_init_data_offset = TryInto::::try_into(self.init_data_offset())?; if init_data_offset != expected_init_data_offset { return Err(new_error!( @@ -719,7 +582,7 @@ impl SandboxMemoryLayout { #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn write_init_data(&self, out: &mut [u8], bytes: &[u8]) -> Result<()> { - out[self.init_data_offset..self.init_data_offset + self.init_data_size] + out[self.init_data_offset()..self.init_data_offset() + self.init_data_size] .copy_from_slice(bytes); Ok(()) } @@ -731,84 +594,43 @@ impl SandboxMemoryLayout { /// from this function. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn write_peb(&self, mem: &mut [u8]) -> Result<()> { - let guest_offset = SandboxMemoryLayout::BASE_ADDRESS; - - fn write_u64(mem: &mut [u8], offset: usize, value: u64) -> Result<()> { - if offset + 8 > mem.len() { - return Err(new_error!( - "Cannot write to offset {} in slice of len {}", - offset, - mem.len() - )); - } - mem[offset..offset + 8].copy_from_slice(&u64::to_ne_bytes(value)); - Ok(()) - } + use hyperlight_common::mem::GuestMemoryRegion; - macro_rules! get_address { - ($something:ident) => { - u64::try_from(guest_offset + self.$something)? - }; - } + let guest_base = Self::BASE_ADDRESS as u64; - // Start of setting up the PEB. The following are in the order of the PEB fields - - // Set up input buffer pointer - write_u64( - mem, - self.get_input_data_size_offset(), - self.sandbox_memory_config - .get_input_data_size() - .try_into()?, - )?; - write_u64( - mem, - self.get_input_data_pointer_offset(), - self.get_input_data_buffer_gva(), - )?; - - // Set up output buffer pointer - write_u64( - mem, - self.get_output_data_size_offset(), - self.sandbox_memory_config - .get_output_data_size() - .try_into()?, - )?; - write_u64( - mem, - self.get_output_data_pointer_offset(), - self.get_output_data_buffer_gva(), - )?; - - // Set up init data pointer - write_u64( - mem, - self.get_init_data_size_offset(), - (self.get_unaligned_memory_size() - self.init_data_offset).try_into()?, - )?; - let addr = get_address!(init_data_offset); - write_u64(mem, self.get_init_data_pointer_offset(), addr)?; - - // Set up heap buffer pointer - let addr = get_address!(guest_heap_buffer_offset); - write_u64(mem, self.get_heap_size_offset(), self.heap_size.try_into()?)?; - write_u64(mem, self.get_heap_pointer_offset(), addr)?; - - // Set up the file_mappings descriptor in the PEB. - // - The `size` field holds the number of valid FileMappingInfo - // entries currently written (initially 0 — entries are added - // later by map_file_cow / evolve). - // - The `ptr` field holds the guest address of the preallocated - // FileMappingInfo array - #[cfg(feature = "nanvix-unstable")] - write_u64(mem, self.get_file_mappings_size_offset(), 0)?; - #[cfg(feature = "nanvix-unstable")] - write_u64( - mem, - self.get_file_mappings_pointer_offset(), - self.get_file_mappings_array_gva(), - )?; + let peb = HyperlightPEB { + input_stack: GuestMemoryRegion { + size: self.input_data_size as u64, + ptr: self.get_input_data_buffer_gva(), + }, + output_stack: GuestMemoryRegion { + size: self.output_data_size as u64, + ptr: self.get_output_data_buffer_gva(), + }, + init_data: GuestMemoryRegion { + size: (self.get_unaligned_memory_size() - self.init_data_offset()) as u64, + ptr: guest_base + self.init_data_offset() as u64, + }, + guest_heap: GuestMemoryRegion { + size: self.heap_size as u64, + ptr: guest_base + self.guest_heap_buffer_offset() as u64, + }, + // Set up the file_mappings descriptor in the PEB. + // - The `size` field holds the number of valid FileMappingInfo + // entries currently written (initially 0 — entries are added + // later by map_file_cow / evolve). + // - The `ptr` field holds the guest address of the preallocated + // FileMappingInfo array + #[cfg(feature = "nanvix-unstable")] + file_mappings: GuestMemoryRegion { + size: 0, // entry count, populated later by map_file_cow + ptr: self.get_file_mappings_array_gva(), + }, + }; + + let offset = self.peb_offset(); + peb.write_to(&mut mem[offset..offset + size_of::()]) + .map_err(|e| new_error!("failed to write PEB: {}", e))?; // End of setting up the PEB diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 68f35ff7d..9e5d843d1 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -441,7 +441,7 @@ impl SandboxMemoryManager { pub(crate) fn get_host_function_call(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -456,7 +456,7 @@ impl SandboxMemoryManager { self.scratch_mem.push_buffer( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, data, ) } @@ -473,7 +473,7 @@ impl SandboxMemoryManager { self.scratch_mem.push_buffer( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, buffer, )?; Ok(()) @@ -485,7 +485,7 @@ impl SandboxMemoryManager { pub(crate) fn get_guest_function_call_result(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -494,7 +494,7 @@ impl SandboxMemoryManager { pub(crate) fn read_guest_log_data(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -503,7 +503,7 @@ impl SandboxMemoryManager { loop { let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) else { break; }; @@ -512,7 +512,7 @@ impl SandboxMemoryManager { loop { let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, ) else { break; }; diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 7f0cc1c0d..c037af06e 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -73,7 +73,7 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result() }; let peb_addr = { - let peb_u64 = u64::try_from(hshm.layout.peb_address)?; + let peb_u64 = u64::try_from(hshm.layout.peb_address())?; RawPtr::from(peb_u64) }; From 5c916ffa3224b61ecdefa448a7c50c2e656c5a20 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:48:32 -0700 Subject: [PATCH 02/15] Move sandbox snapshot.rs into snapshot module dir Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/sandbox/{snapshot.rs => snapshot/mod.rs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/hyperlight_host/src/sandbox/{snapshot.rs => snapshot/mod.rs} (100%) diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs similarity index 100% rename from src/hyperlight_host/src/sandbox/snapshot.rs rename to src/hyperlight_host/src/sandbox/snapshot/mod.rs From df713391b730084dc3ed3835218962e5dd0d2a71 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:48:59 -0700 Subject: [PATCH 03/15] Remove unused get_file_mappings_pointer_offset Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/mem/layout.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 039fdb21c..df8a86be5 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -391,12 +391,6 @@ impl SandboxMemoryLayout { self.peb_file_mappings_offset() } - /// Get the offset in guest memory to the file_mappings pointer field. - #[cfg(feature = "nanvix-unstable")] - fn get_file_mappings_pointer_offset(&self) -> usize { - self.get_file_mappings_size_offset() + size_of::() - } - /// Get the offset in snapshot memory where the FileMappingInfo array starts /// (immediately after the PEB struct, within the same page). #[cfg(feature = "nanvix-unstable")] From 5ca285a0872497de41bda2421da1e840a9203e43 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:50:22 -0700 Subject: [PATCH 04/15] Expose SandboxMemoryLayout fields to crate Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/mem/layout.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index df8a86be5..a5bca8727 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -225,16 +225,16 @@ pub(crate) struct SandboxMemoryLayout { /// The size of the guest code section. pub(crate) code_size: usize, /// The size of the init data section (guest blob). - init_data_size: usize, + pub(crate) init_data_size: usize, /// Permission flags for the init data region. #[cfg_attr(feature = "i686-guest", allow(unused))] - init_data_permissions: Option, + pub(crate) init_data_permissions: Option, /// The size of the scratch region in physical memory. - scratch_size: usize, + pub(crate) scratch_size: usize, /// The size of the snapshot region in physical memory. - snapshot_size: usize, + pub(crate) snapshot_size: usize, /// The size of the page tables (None if not yet set). - pt_size: Option, + pub(crate) pt_size: Option, } impl SandboxMemoryLayout { From 5efe038818d8556f9d0711da942f30d3a7e71bbc Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:50:33 -0700 Subject: [PATCH 05/15] Extract HyperlightVm apply_sregs helper Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/hyperlight_vm/x86_64.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index 857490600..16ac55ad3 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -346,6 +346,15 @@ impl HyperlightVm { self.vm.set_debug_regs(&CommonDebugRegs::default())?; self.vm.reset_xsave()?; + self.apply_sregs(cr3, sregs) + } + + /// Apply special registers and mark TLB for flush. + pub(crate) fn apply_sregs( + &mut self, + cr3: u64, + sregs: &CommonSpecialRegisters, + ) -> std::result::Result<(), RegisterError> { // Restore the full special registers from snapshot, but update CR3 // to point to the new (relocated) page tables let mut sregs = *sregs; From 2d131e0a44aa2edda8aa4e299b9e7b99f0b0b637 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:50:43 -0700 Subject: [PATCH 06/15] Introduce HostFunctions newtype for sandbox construction Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/func/host_functions.rs | 26 ++++- src/hyperlight_host/src/lib.rs | 3 + src/hyperlight_host/src/sandbox/host_funcs.rs | 99 +++++++++++++++++-- .../src/sandbox/uninitialized.rs | 9 +- 4 files changed, 118 insertions(+), 19 deletions(-) diff --git a/src/hyperlight_host/src/func/host_functions.rs b/src/hyperlight_host/src/func/host_functions.rs index e87fa70b0..9ccb98f05 100644 --- a/src/hyperlight_host/src/func/host_functions.rs +++ b/src/hyperlight_host/src/func/host_functions.rs @@ -52,7 +52,8 @@ impl Registerable for UninitializedSandbox { return_type: Output::TYPE, }; - (*hfs).register_host_function(name.to_string(), entry) + (*hfs).register_host_function(name.to_string(), entry); + Ok(()) } } @@ -92,7 +93,26 @@ impl Registerable for crate::MultiUseSandbox { return_type: Output::TYPE, }; - (*hfs).register_host_function(name.to_string(), entry) + (*hfs).register_host_function(name.to_string(), entry); + Ok(()) + } +} + +impl Registerable for crate::HostFunctions { + fn register_host_function( + &mut self, + name: &str, + hf: impl Into>, + ) -> Result<()> { + let entry = FunctionEntry { + function: hf.into().into(), + parameter_types: Args::TYPE, + return_type: Output::TYPE, + }; + + self.inner_mut() + .register_host_function(name.to_string(), entry); + Ok(()) } } @@ -236,7 +256,7 @@ pub(crate) fn register_host_function, } -impl From<&mut FunctionRegistry> for HostFunctionDetails { - fn from(registry: &mut FunctionRegistry) -> Self { +/// A collection of host functions that can be supplied to a sandbox +/// constructor (e.g. [`crate::MultiUseSandbox::from_snapshot`]) to +/// expose host-side functionality to the guest. +/// +/// Use [`HostFunctions::default`] to start with the standard +/// `HostPrint` function pre-registered (matches the registry that the +/// regular `UninitializedSandbox` → `evolve()` path constructs), or +/// [`HostFunctions::new`] to start with an empty registry. +/// +/// Add additional host functions via the +/// [`crate::func::Registerable`] trait, just as you would on an +/// `UninitializedSandbox`. +/// +/// ```no_run +/// # use hyperlight_host::{HostFunctions, Result}; +/// # use hyperlight_host::func::Registerable; +/// # fn example() -> Result<()> { +/// // Default: HostPrint already registered. +/// let mut funcs = HostFunctions::default(); +/// funcs.register_host_function("Add", |a: i32, b: i32| Ok(a + b))?; +/// # Ok(()) +/// # } +/// ``` +pub struct HostFunctions(FunctionRegistry); + +impl HostFunctions { + /// Create an empty `HostFunctions` with no host functions + /// registered. + /// + /// Most callers want [`HostFunctions::default`] instead, which + /// pre-registers the standard `HostPrint` function. + pub fn new() -> Self { + Self(FunctionRegistry::default()) + } + + /// Consume this `HostFunctions` and return the inner registry. + pub(crate) fn into_inner(self) -> FunctionRegistry { + self.0 + } + + /// Borrow the inner registry mutably. + pub(crate) fn inner_mut(&mut self) -> &mut FunctionRegistry { + &mut self.0 + } + + /// Borrow the inner registry immutably. + pub(crate) fn inner(&self) -> &FunctionRegistry { + &self.0 + } +} + +impl Default for HostFunctions { + /// Create a `HostFunctions` pre-populated with the standard + /// `HostPrint` function (writes UTF-8 strings to the host's + /// stdout in green). + /// + /// This matches the default registry installed by + /// `UninitializedSandbox::new()`, so a snapshot taken from a + /// regular sandbox can be loaded with + /// `MultiUseSandbox::from_snapshot(snap, HostFunctions::default(), None)` + /// without registering anything else. + /// + /// Use [`HostFunctions::new`] for an empty registry. + fn default() -> Self { + Self(FunctionRegistry::with_default_host_print()) + } +} + +impl From<&FunctionRegistry> for HostFunctionDetails { + fn from(registry: &FunctionRegistry) -> Self { let host_functions = registry .functions_map .iter() @@ -61,15 +129,26 @@ pub struct FunctionEntry { impl FunctionRegistry { /// Register a host function with the sandbox. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn register_host_function( - &mut self, - name: String, - func: FunctionEntry, - ) -> Result<()> { + #[instrument(skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn register_host_function(&mut self, name: String, func: FunctionEntry) { self.functions_map.insert(name, func); + } - Ok(()) + /// Create a `FunctionRegistry` pre-populated with the default + /// `HostPrint` function (writes to stdout with green text). + pub(crate) fn with_default_host_print() -> Self { + use crate::func::host_functions::HostFunction; + use crate::func::{ParameterTuple, SupportedReturnType}; + + let mut registry = Self::default(); + let hf: HostFunction = default_writer_func.into(); + let entry = FunctionEntry { + function: hf.into(), + parameter_types: <(String,)>::TYPE, + return_type: ::TYPE, + }; + registry.register_host_function("HostPrint".to_string(), entry); + registry } /// Assuming a host function called `"HostPrint"` exists, and takes a @@ -118,7 +197,7 @@ impl FunctionRegistry { /// The default writer function is to write to stdout with green text. #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -pub(super) fn default_writer_func(s: String) -> Result { +fn default_writer_func(s: String) -> Result { match std::io::stdout().is_terminal() { false => { print!("{}", s); diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 23c01be28..b65b1e655 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -22,7 +22,7 @@ use std::sync::{Arc, Mutex}; use tracing::{Span, instrument}; use tracing_core::LevelFilter; -use super::host_funcs::{FunctionRegistry, default_writer_func}; +use super::host_funcs::FunctionRegistry; use super::snapshot::Snapshot; use super::uninitialized_evolve::evolve_impl_multi_use; use crate::func::host_functions::{HostFunction, register_host_function}; @@ -365,9 +365,9 @@ impl UninitializedSandbox { let mem_mgr_wrapper = SandboxMemoryManager::::from_snapshot(snapshot.as_ref())?; - let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); + let host_funcs = Arc::new(Mutex::new(FunctionRegistry::with_default_host_print())); - let mut sandbox = Self { + let sandbox = Self { host_funcs, mgr: mem_mgr_wrapper, max_guest_log_level: None, @@ -383,9 +383,6 @@ impl UninitializedSandbox { pending_file_mappings: Vec::new(), }; - // If we were passed a writer for host print register it otherwise use the default. - sandbox.register_print(default_writer_func)?; - crate::debug!("Sandbox created: {:#?}", sandbox); Ok(sandbox) From 1df46d9e325bade81952f1c154f5829aa379cc29 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:51:09 -0700 Subject: [PATCH 07/15] Add file backed ReadonlySharedMemory Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/mem/memory_region.rs | 4 +- src/hyperlight_host/src/mem/shared_mem.rs | 274 +++++++++++++++++++ 2 files changed, 277 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 615fe9cac..de3c83a20 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -158,7 +158,9 @@ impl MemoryRegionType { /// shared memory mapping with guard pages. pub fn surrogate_mapping(&self) -> SurrogateMapping { match self { - MemoryRegionType::MappedFile => SurrogateMapping::ReadOnlyFile, + MemoryRegionType::MappedFile | MemoryRegionType::Snapshot => { + SurrogateMapping::ReadOnlyFile + } _ => SurrogateMapping::SandboxMemory, } } diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 5f975f605..7fb7c9eb4 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -2012,6 +2012,14 @@ pub struct ReadonlySharedMemory { /// by `mapping_at`. If `None`, the full `mem_size()` is mapped. #[cfg_attr(unshared_snapshot_mem, allow(dead_code))] guest_mapped_size: Option, + /// Size of the leading guard region (the bytes between + /// `region.ptr` and the start of the usable memory). For most + /// constructors this is exactly `PAGE_SIZE_USIZE`. The Windows + /// `from_file` path can use a larger leading guard when the + /// snapshot file's `memory_offset` exceeds one page (which + /// happens whenever the file carries host-function metadata + /// before the memory blob). + leading_guard_size: usize, } // Safety: HostMapping is only non-Send/Sync (causing // ReadonlySharedMemory to not be automatically Send/Sync) because raw @@ -2033,6 +2041,7 @@ impl ReadonlySharedMemory { Ok(ReadonlySharedMemory { region: anon.region, guest_mapped_size: None, + leading_guard_size: PAGE_SIZE_USIZE, }) } @@ -2045,6 +2054,7 @@ impl ReadonlySharedMemory { Ok(ReadonlySharedMemory { region: anon.region, guest_mapped_size: Some(guest_mapped_size), + leading_guard_size: PAGE_SIZE_USIZE, }) } @@ -2055,6 +2065,244 @@ impl ReadonlySharedMemory { self.guest_mapped_size.unwrap_or_else(|| self.mem_size()) } + /// Create a `ReadonlySharedMemory` backed by a file on disk. + /// + /// Only the `len` bytes at `[offset..offset+len)` (the memory + /// blob) are exposed via `base_ptr()` and `mem_size()`. + /// + /// `[offset..offset+len)` is surrounded by guard regions on the + /// host. + /// + /// `offset` and `len` must both be non-zero multiples of + /// `PAGE_SIZE`. If `guest_mapped_size` is set, it must also be + /// a non-zero multiple of `PAGE_SIZE` no greater than `len`. + pub(crate) fn from_file( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + if len == 0 { + return Err(new_error!( + "Cannot create file-backed shared memory with size 0" + )); + } + + if offset == 0 || offset % PAGE_SIZE_USIZE != 0 { + return Err(new_error!( + "snapshot file offset {} must be a non-zero multiple of PAGE_SIZE", + offset + )); + } + + if !len.is_multiple_of(PAGE_SIZE_USIZE) { + return Err(new_error!( + "snapshot mapping length {} must be a multiple of PAGE_SIZE", + len + )); + } + + if let Some(gms) = guest_mapped_size + && (gms == 0 || gms > len || !gms.is_multiple_of(PAGE_SIZE_USIZE)) + { + return Err(new_error!( + "snapshot guest_mapped_size {} must be a non-zero multiple of PAGE_SIZE no greater than len {}", + gms, + len + )); + } + + #[cfg(target_os = "linux")] + { + Self::from_file_linux(file, offset, len, guest_mapped_size) + } + #[cfg(target_os = "windows")] + { + Self::from_file_windows(file, offset, len, guest_mapped_size) + } + } + + #[cfg(target_os = "linux")] + fn from_file_linux( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + use std::ffi::c_void; + use std::os::unix::io::AsRawFd; + + use libc::{ + MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_NORESERVE, MAP_PRIVATE, PROT_NONE, PROT_READ, + PROT_WRITE, mmap, off_t, size_t, + }; + + let total_size = len.checked_add(2 * PAGE_SIZE_USIZE).ok_or_else(|| { + new_error!("Memory required for file-backed snapshot exceeded usize::MAX") + })?; + + let fd = file.as_raw_fd(); + let offset: off_t = offset + .try_into() + .map_err(|_| new_error!("snapshot file offset {} exceeds off_t range", offset))?; + + // Allocate the full region (guard + usable + guard) as anonymous + let base = unsafe { + mmap( + null_mut(), + total_size as size_t, + PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, + -1, + 0 as off_t, + ) + }; + if base == MAP_FAILED { + return Err(HyperlightError::MmapFailed( + std::io::Error::last_os_error().raw_os_error(), + )); + } + + // Map the file content over the usable portion (between guard pages). + // PROT_READ | PROT_WRITE: KVM/MSHV require writable host mappings + // to handle copy-on-write page faults from the guest. + // MAP_PRIVATE: writes go to private copies, not the file. + let usable_ptr = unsafe { (base as *mut u8).add(PAGE_SIZE_USIZE) }; + let mapped = unsafe { + mmap( + usable_ptr as *mut c_void, + len as size_t, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, + fd, + offset, + ) + }; + if mapped == MAP_FAILED { + unsafe { libc::munmap(base, total_size as size_t) }; + return Err(HyperlightError::MmapFailed( + std::io::Error::last_os_error().raw_os_error(), + )); + } + + // Guard pages at base and base+total_size-PAGE_SIZE are already + // PROT_NONE from the anonymous mapping; MAP_FIXED only replaced + // the middle portion. + + #[allow(clippy::arc_with_non_send_sync)] + Ok(ReadonlySharedMemory { + region: Arc::new(HostMapping { + ptr: base as *mut u8, + size: total_size, + }), + guest_mapped_size, + leading_guard_size: PAGE_SIZE_USIZE, + }) + } + + /// Windows file mappings must start at file offset 0 and cannot + /// extend beyond the file's size, so the view covers + /// `[0 .. offset + len + PAGE_SIZE)`. The leading `offset` bytes + /// (header plus any host function metadata) become the leading + /// guard, recorded in `leading_guard_size`. The trailing + /// `PAGE_SIZE` bytes (written explicitly by `to_file`) become + /// the trailing guard. Both ends are protected with + /// `VirtualProtect(PAGE_NOACCESS)`. + #[cfg(target_os = "windows")] + fn from_file_windows( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + use std::os::windows::io::AsRawHandle; + + use windows::Win32::Foundation::HANDLE; + use windows::Win32::System::Memory::{ + CreateFileMappingA, FILE_MAP_READ, MapViewOfFile, PAGE_NOACCESS, PAGE_PROTECTION_FLAGS, + PAGE_READONLY, VirtualProtect, + }; + use windows::core::PCSTR; + + let leading_guard_size = offset; + let total_size = leading_guard_size + .checked_add(len) + .and_then(|n| n.checked_add(PAGE_SIZE_USIZE)) + .ok_or_else(|| { + new_error!("Memory required for file-backed snapshot exceeded usize::MAX") + })?; + debug_assert!(leading_guard_size >= PAGE_SIZE_USIZE); + debug_assert!(leading_guard_size % PAGE_SIZE_USIZE == 0); + + let file_handle = HANDLE(file.as_raw_handle()); + + // Create a read-only file mapping at the exact file size (pass 0,0). + // The file includes trailing PAGE_SIZE padding written by to_file(), + // so the file is at least leading_guard_size + len + PAGE_SIZE bytes. + let handle = + unsafe { CreateFileMappingA(file_handle, None, PAGE_READONLY, 0, 0, PCSTR::null())? }; + + if handle.is_invalid() { + log_then_return!(HyperlightError::MemoryAllocationFailed( + Error::last_os_error().raw_os_error() + )); + } + + // Map exactly total_size (leading region + blob + trailing padding) bytes. + let addr = unsafe { MapViewOfFile(handle, FILE_MAP_READ, 0, 0, total_size) }; + if addr.Value.is_null() { + unsafe { + let _ = windows::Win32::Foundation::CloseHandle(handle); + } + log_then_return!(HyperlightError::MemoryAllocationFailed( + Error::last_os_error().raw_os_error() + )); + } + + #[allow(clippy::arc_with_non_send_sync)] + let region = Arc::new(HostMapping { + ptr: addr.Value as *mut u8, + size: total_size, + handle, + }); + + // Set guard pages on both ends. + let mut unused_old_prot = PAGE_PROTECTION_FLAGS(0); + + // Leading guard: covers the fixed header and any host-function + // metadata + let first_guard = addr.Value; + if let Err(e) = unsafe { + VirtualProtect( + first_guard, + leading_guard_size, + PAGE_NOACCESS, + &mut unused_old_prot, + ) + } { + log_then_return!(WindowsAPIError(e.clone())); + } + + // Trailing guard: the explicit PAGE_SIZE padding at the end of the file. + let last_guard = unsafe { first_guard.add(total_size - PAGE_SIZE_USIZE) }; + if let Err(e) = unsafe { + VirtualProtect( + last_guard, + PAGE_SIZE_USIZE, + PAGE_NOACCESS, + &mut unused_old_prot, + ) + } { + log_then_return!(WindowsAPIError(e.clone())); + } + + Ok(ReadonlySharedMemory { + region, + guest_mapped_size, + leading_guard_size, + }) + } + pub(crate) fn as_slice(&self) -> &[u8] { unsafe { std::slice::from_raw_parts(self.base_ptr(), self.mem_size()) } } @@ -2098,6 +2346,32 @@ impl SharedMemory for ReadonlySharedMemory { fn region(&self) -> &HostMapping { &self.region } + // Override the default trait accessors to use the variable-sized + // leading guard. The trailing guard is always `PAGE_SIZE_USIZE`. + fn base_addr(&self) -> usize { + self.region().ptr as usize + self.leading_guard_size + } + fn base_ptr(&self) -> *mut u8 { + self.region().ptr.wrapping_add(self.leading_guard_size) + } + fn mem_size(&self) -> usize { + self.region().size - self.leading_guard_size - PAGE_SIZE_USIZE + } + fn host_region_base(&self) -> ::HostBaseType { + #[cfg(not(windows))] + { + self.base_addr() + } + #[cfg(windows)] + { + super::memory_region::HostRegionBase { + from_handle: self.region().handle.into(), + handle_base: self.region().ptr as usize, + handle_size: self.region().size, + offset: self.leading_guard_size, + } + } + } // There's no way to get exclusive (and therefore writable) access // to a ReadonlySharedMemory. fn with_exclusivity T>( From 9d4d5a642c88c363d4b6b455f0a8bfe1c731e55c Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:52:41 -0700 Subject: [PATCH 08/15] Persist host function metadata in Snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/src/mem/mgr.rs | 4 + .../src/sandbox/initialized_multi_use.rs | 6 ++ .../src/sandbox/snapshot/mod.rs | 78 +++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 9e5d843d1..9dede7083 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -22,6 +22,7 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{ }; use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult; use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData; +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::vmem::{self, PAGE_TABLE_SIZE}; #[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] use hyperlight_common::vmem::{BasicMapping, MappingKind}; @@ -298,6 +299,7 @@ where } /// Create a snapshot with the given mapped regions + #[allow(clippy::too_many_arguments)] pub(crate) fn snapshot( &mut self, sandbox_id: u64, @@ -306,6 +308,7 @@ where rsp_gva: u64, sregs: CommonSpecialRegisters, entrypoint: NextAction, + host_functions: HostFunctionDetails, ) -> Result { self.snapshot_count += 1; Snapshot::new( @@ -320,6 +323,7 @@ where sregs, entrypoint, self.snapshot_count, + host_functions, ) } } diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 241622cab..e6672f874 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -207,6 +207,11 @@ impl MultiUseSandbox { .get_snapshot_sregs() .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; let entrypoint = self.vm.get_entrypoint(); + let host_functions = (&*self.host_funcs.try_lock().map_err(|e| { + crate::new_error!("Error locking host_funcs at {}:{}: {}", file!(), line!(), e) + })?) + .into(); + let memory_snapshot = self.mem_mgr.snapshot( self.id, mapped_regions_vec, @@ -214,6 +219,7 @@ impl MultiUseSandbox { stack_top_gpa, sregs, entrypoint, + host_functions, )?; let snapshot = Arc::new(memory_snapshot); self.snapshot = Some(snapshot.clone()); diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index e4c7b1133..b37b7f361 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -17,6 +17,7 @@ limitations under the License. use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicU64, Ordering}; +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; use hyperlight_common::vmem; use hyperlight_common::vmem::{ @@ -115,6 +116,16 @@ pub struct Snapshot { /// restored sandbox's guest-visible counter so the guest can tell /// which snapshot it is currently a clone of. snapshot_generation: u64, + + /// Names and signatures of host functions registered on the + /// sandbox at the time this snapshot was taken. Persisted to disk + /// so that [`crate::MultiUseSandbox::from_snapshot`] can reject + /// a `HostFunctions` set that is missing required functions or + /// has mismatched signatures. + /// + /// Empty for snapshots created via test-only constructors that + /// bypass the normal sandbox path. + host_functions: HostFunctionDetails, } impl core::convert::AsRef for Snapshot { fn as_ref(&self) -> &Self { @@ -423,6 +434,9 @@ impl Snapshot { sregs: None, entrypoint: NextAction::Initialise(load_addr + entrypoint_va - base_va), snapshot_generation: 0, + host_functions: HostFunctionDetails { + host_functions: None, + }, }) } @@ -447,6 +461,7 @@ impl Snapshot { sregs: CommonSpecialRegisters, entrypoint: NextAction, snapshot_generation: u64, + host_functions: HostFunctionDetails, ) -> Result { let mut phys_seen = HashMap::::new(); let scratch_gva = scratch_base_gva(layout.get_scratch_size()); @@ -610,6 +625,7 @@ impl Snapshot { sregs: Some(sregs), entrypoint, snapshot_generation, + host_functions, }) } @@ -663,6 +679,65 @@ impl Snapshot { pub(crate) fn entrypoint(&self) -> NextAction { self.entrypoint } + + /// Validate that `provided` is a superset of the host functions + /// recorded in this snapshot: every function that was registered + /// at snapshot time must also be present in `provided` with a + /// matching signature. Extras in `provided` are allowed. + /// + /// A snapshot with no recorded host functions (e.g. one + /// produced by a test-only constructor) accepts any `provided` + /// set. + pub(crate) fn validate_host_functions(&self, provided: &crate::HostFunctions) -> Result<()> { + let required = match &self.host_functions.host_functions { + Some(v) => v, + None => return Ok(()), + }; + if required.is_empty() { + return Ok(()); + } + + // Build a HostFunctionDetails view of the provided registry + // using the existing `From<&FunctionRegistry>` impl. + let provided_details: HostFunctionDetails = provided.inner().into(); + let provided_funcs = provided_details.host_functions.unwrap_or_default(); + + let mut missing: Vec = Vec::new(); + let mut signature_mismatches: Vec = Vec::new(); + + for req in required { + match provided_funcs + .iter() + .find(|f| f.function_name == req.function_name) + { + None => missing.push(req.function_name.clone()), + Some(found) + if found.parameter_types != req.parameter_types + || found.return_type != req.return_type => + { + signature_mismatches.push(format!( + "{}: snapshot has {:?} -> {:?}, registered {:?} -> {:?}", + req.function_name, + req.parameter_types, + req.return_type, + found.parameter_types, + found.return_type, + )); + } + Some(_) => {} + } + } + + if missing.is_empty() && signature_mismatches.is_empty() { + return Ok(()); + } + + Err(crate::new_error!( + "snapshot host function mismatch: missing={:?}, signature_mismatches={:?}", + missing, + signature_mismatches + )) + } } impl PartialEq for Snapshot { @@ -674,6 +749,7 @@ impl PartialEq for Snapshot { #[cfg(test)] #[cfg(not(feature = "i686-guest"))] mod tests { + use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; use crate::hypervisor::regs::CommonSpecialRegisters; @@ -747,6 +823,7 @@ mod tests { default_sregs(), super::NextAction::None, 1, + HostFunctionDetails::default(), ) .unwrap(); @@ -764,6 +841,7 @@ mod tests { default_sregs(), super::NextAction::None, 2, + HostFunctionDetails::default(), ) .unwrap(); From 26bef034b587428b6b3b6cacf78cc20e6726df04 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 13:24:48 -0700 Subject: [PATCH 09/15] Make gdb work for already initialised snapshots Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/hypervisor/gdb/arch.rs | 7 ----- .../src/hypervisor/gdb/event_loop.rs | 1 - src/hyperlight_host/src/hypervisor/gdb/mod.rs | 4 --- .../src/hypervisor/hyperlight_vm/mod.rs | 29 +++++++++++++++---- .../src/hypervisor/hyperlight_vm/x86_64.rs | 17 +++++++++-- 5 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/hyperlight_host/src/hypervisor/gdb/arch.rs b/src/hyperlight_host/src/hypervisor/gdb/arch.rs index b2ebb82a3..b7c32b9c0 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/arch.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/arch.rs @@ -63,7 +63,6 @@ pub(crate) const DR6_HW_BP_FLAGS_MASK: u64 = 0x0F << DR6_HW_BP_FLAGS_POS; pub(crate) fn vcpu_stop_reason( vm: &mut dyn DebuggableVm, dr6: u64, - entrypoint: u64, exception: u32, ) -> std::result::Result { let CommonRegisters { rip, .. } = vm.regs()?; @@ -81,10 +80,6 @@ pub(crate) fn vcpu_stop_reason( // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual if DR6_HW_BP_FLAGS_MASK & dr6 != 0 { - if rip == entrypoint { - vm.remove_hw_breakpoint(entrypoint)?; - return Ok(VcpuStopReason::EntryPointBp); - } return Ok(VcpuStopReason::HwBp); } } @@ -98,12 +93,10 @@ pub(crate) fn vcpu_stop_reason( r"The vCPU exited because of an unknown reason: rip: {:?} dr6: {:?} - entrypoint: {:?} exception: {:?} ", rip, dr6, - entrypoint, exception, ); diff --git a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs index bc7c9fd14..5edd81b50 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs @@ -59,7 +59,6 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { // Resume execution if unknown reason for stop let stop_response = match stop_reason { VcpuStopReason::DoneStep => BaseStopReason::DoneStep, - VcpuStopReason::EntryPointBp => BaseStopReason::HwBreak(()), VcpuStopReason::SwBp => BaseStopReason::SwBreak(()), VcpuStopReason::HwBp => BaseStopReason::HwBreak(()), // This is a consequence of the GDB client sending an interrupt signal diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index 94396e5ae..0a0685f71 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -171,10 +171,6 @@ impl DebugMemoryAccess { pub enum VcpuStopReason { Crash, DoneStep, - /// Hardware breakpoint inserted by the hypervisor so the guest can be stopped - /// at the entry point. This is used to avoid the guest from executing - /// the entry point code before the debugger is connected - EntryPointBp, HwBp, SwBp, Interrupt, diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs index 830b856c0..221552230 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs @@ -389,6 +389,12 @@ pub(crate) struct HyperlightVm { pub(super) gdb_conn: Option>, #[cfg(gdb)] pub(super) sw_breakpoints: HashMap, // addr -> original instruction + /// One-shot hw breakpoint installed at the entry address when gdb is + /// enabled, so the gdb stub gets a `VcpuStopped` to enter its event + /// loop on the first vCPU run after construction. Cleared on first + /// hit by `handle_debug`. + #[cfg(gdb)] + pub(super) one_shot_entry_bp: Option, #[cfg(feature = "mem_profile")] pub(super) trace_info: MemTraceInfo, #[cfg(crashdump)] @@ -598,17 +604,28 @@ impl HyperlightVm { match exit_reason { #[cfg(gdb)] Ok(VmExit::Debug { dr6, exception }) => { - let initialise = match self.entrypoint { - NextAction::Initialise(initialise) => initialise, - _ => 0, - }; - // Handle debug event (breakpoints) + // Classify the debug exit. `vcpu_stop_reason` is a + // pure classifier and has no side effects on the VM. let stop_reason = crate::hypervisor::gdb::arch::vcpu_stop_reason( self.vm.as_mut(), dr6, - initialise, exception, )?; + // Remove the one-shot entry breakpoint installed by + // `HyperlightVm::new` the first time it fires so it + // does not interfere with later user-installed + // breakpoints at the same address. + if matches!(stop_reason, VcpuStopReason::HwBp) + && let Some(entry_addr) = self.one_shot_entry_bp + { + let rip = self.vm.regs().map_err(VcpuStopReasonError::GetRegs)?.rip; + if rip == entry_addr { + self.vm + .remove_hw_breakpoint(entry_addr) + .map_err(VcpuStopReasonError::RemoveHwBreakpoint)?; + self.one_shot_entry_bp = None; + } + } if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { break Err(e.into()); } diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index 16ac55ad3..6feff13f1 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -168,6 +168,8 @@ impl HyperlightVm { gdb_conn, #[cfg(gdb)] sw_breakpoints: HashMap::new(), + #[cfg(gdb)] + one_shot_entry_bp: None, #[cfg(feature = "mem_profile")] trace_info, #[cfg(crashdump)] @@ -182,12 +184,21 @@ impl HyperlightVm { #[cfg(gdb)] if ret.gdb_conn.is_some() { ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; - // Add breakpoint to the entry point address, if we are going to initialise + // Add breakpoint at the entry point address. The breakpoint + // is removed on first hit by the run loop. Tracked via + // `one_shot_entry_bp` so it does not interfere with later + // user-installed breakpoints at the same address. ret.vm.set_debug(true).map_err(VmError::Debug)?; - if let NextAction::Initialise(initialise) = entrypoint { + let entry_addr = match entrypoint { + NextAction::Initialise(addr) | NextAction::Call(addr) => Some(addr), + #[cfg(test)] + NextAction::None => None, + }; + if let Some(addr) = entry_addr { ret.vm - .add_hw_breakpoint(initialise) + .add_hw_breakpoint(addr) .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; + ret.one_shot_entry_bp = Some(addr); } } From 5b58758899701b44a007381693f6535dfc11cccd Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:53:01 -0700 Subject: [PATCH 10/15] Add MultiUseSandbox from_snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/sandbox/initialized_multi_use.rs | 231 ++++++++++++++++++ 1 file changed, 231 insertions(+) diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index e6672f874..233f498df 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -145,6 +145,195 @@ impl MultiUseSandbox { self.pt_root_finder = Some(finder); } + /// Create a `MultiUseSandbox` directly from a [`Snapshot`], + /// bypassing [`UninitializedSandbox`](crate::UninitializedSandbox) + /// and [`evolve()`](crate::UninitializedSandbox::evolve). + /// + /// This is useful for fast sandbox creation when a snapshot of + /// an already-initialized guest is available, either saved to disk + /// or captured in memory from another sandbox. + /// + /// The provided [`HostFunctions`] must include every host function + /// that was registered on the sandbox at the time the snapshot was + /// taken (matched by name and signature). Additional host functions + /// not present in the snapshot are allowed. + /// + /// An optional [`SandboxConfiguration`](crate::sandbox::SandboxConfiguration) + /// can be supplied to override runtime settings such as timeouts and + /// interrupt behavior. Memory layout fields + /// (`input_data_size`, `output_data_size`, `heap_size`, `scratch_size`) + /// are always taken from the snapshot. Any values supplied in + /// `config` for those fields are ignored. + /// + /// # Examples + /// + /// From a snapshot taken on another sandbox: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// // Create and initialize a sandbox the normal way + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None, + /// )?.evolve()?; + /// + /// // Capture a snapshot of the initialized state + /// let snapshot = sandbox.snapshot()?; + /// + /// // Create a new sandbox directly from the snapshot + /// let mut sandbox2 = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: i32 = sandbox2.call("GetValue", ())?; + /// # Ok(()) + /// # } + /// ``` + /// + /// From a snapshot loaded from disk: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox}; + /// # use hyperlight_host::sandbox::snapshot::Snapshot; + /// # fn example() -> Result<(), Box> { + /// let snapshot = Arc::new(Snapshot::from_file("guest_snapshot.hls")?); + /// let mut sandbox = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: String = sandbox.call("Echo", "hello".to_string())?; + /// # Ok(()) + /// # } + /// ``` + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub fn from_snapshot( + snapshot: Arc, + host_funcs: crate::HostFunctions, + config: Option, + ) -> Result { + use rand::RngExt; + + use crate::mem::ptr::RawPtr; + use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; + + // Validate that the provided host functions are a superset of + // those required by the snapshot. + snapshot.validate_host_functions(&host_funcs)?; + + let host_funcs = Arc::new(Mutex::new(host_funcs.into_inner())); + + let stack_top_gva = snapshot.stack_top_gva(); + // Start from the caller's config (if any) so runtime fields + // such as timeouts and interrupt knobs are honored, then + // overwrite the layout fields from the snapshot. The on-disk + // layout is fixed, so any layout values supplied by the + // caller are silently ignored. Warn if the caller passed a + // config whose layout fields disagree with the snapshot, so + // the override is at least visible. + let caller_supplied_config = config.is_some(); + let mut config = config.unwrap_or_default(); + if caller_supplied_config { + warn_on_layout_override(&config, snapshot.layout()); + } + config.set_input_data_size(snapshot.layout().input_data_size); + config.set_output_data_size(snapshot.layout().output_data_size); + config.set_heap_size(snapshot.layout().heap_size as u64); + config.set_scratch_size(snapshot.layout().get_scratch_size()); + let load_info = snapshot.load_info(); + + let mgr = crate::mem::mgr::SandboxMemoryManager::from_snapshot(&snapshot)?; + let (mut hshm, gshm) = mgr.build()?; + + let page_size = u32::try_from(page_size::get())? as usize; + + #[cfg(target_os = "linux")] + crate::signal_handlers::setup_signal_handlers(&config)?; + + // Build the runtime config from the caller's `SandboxConfiguration` + // so that `guest_core_dump` (crashdump) and `guest_debug_info` (gdb) + // take effect just like they do in the normal evolve path. + // `binary_path` and `entry_point` are not available from a snapshot + // and are left unset. This only affects metadata in core dumps. + #[cfg(any(crashdump, gdb))] + let rt_cfg = crate::sandbox::uninitialized::SandboxRuntimeConfig { + #[cfg(crashdump)] + binary_path: None, + #[cfg(gdb)] + debug_info: config.get_guest_debug_info(), + #[cfg(crashdump)] + guest_core_dump: config.get_guest_core_dump(), + #[cfg(crashdump)] + entry_point: None, + }; + + let mut vm = set_up_hypervisor_partition( + gshm, + &config, + stack_top_gva, + page_size, + #[cfg(any(crashdump, gdb))] + rt_cfg, + load_info, + )?; + + let seed = { + let mut rng = rand::rng(); + rng.random::() + }; + let peb_addr = RawPtr::from(u64::try_from(hshm.layout.peb_address())?); + + #[cfg(gdb)] + let dbg_mem_access_hdl = Arc::new(Mutex::new(hshm.clone())); + + vm.initialise( + peb_addr, + seed, + page_size as u32, + &mut hshm, + &host_funcs, + None, + #[cfg(gdb)] + dbg_mem_access_hdl, + ) + .map_err(crate::hypervisor::hyperlight_vm::HyperlightVmError::Initialize)?; + + // If the snapshot was taken from an already-initialized guest + // (NextAction::Call), apply the captured special registers so + // the guest resumes in the correct CPU state. + #[cfg(not(feature = "i686-guest"))] + if matches!(snapshot.entrypoint(), super::snapshot::NextAction::Call(_)) { + let sregs = snapshot.sregs().cloned().unwrap_or_else(|| { + crate::hypervisor::regs::CommonSpecialRegisters::standard_64bit_defaults( + hshm.layout.get_pt_base_gpa(), + ) + }); + vm.apply_sregs(hshm.layout.get_pt_base_gpa(), &sregs) + .map_err(|e| { + crate::HyperlightError::HyperlightVmError( + crate::hypervisor::hyperlight_vm::HyperlightVmError::Restore(e), + ) + })?; + } + + #[cfg(gdb)] + let dbg_mem_wrapper = Arc::new(Mutex::new(hshm.clone())); + + let mut sbox = MultiUseSandbox::from_uninit( + host_funcs, + hshm, + vm, + #[cfg(gdb)] + dbg_mem_wrapper, + ); + // Use the snapshot's sandbox_id so that restore() back to this + // snapshot is permitted. The id is process-local and never + // persisted to disk: `Snapshot::from_file` assigns a fresh id + // on every load, so two `from_file` calls of the same path + // yield restore-incompatible sandboxes (which is the intended + // safer default). Sandboxes built from clones of the same + // in-memory `Arc` share the id and are mutually + // restore-compatible. + sbox.id = snapshot.sandbox_id(); + Ok(sbox) + } + /// Creates a snapshot of the sandbox's current memory state. /// /// The snapshot is tied to this specific sandbox instance and can only be @@ -949,6 +1138,48 @@ impl std::fmt::Debug for MultiUseSandbox { } } +/// Emit a warning for each memory-layout field in `caller` that +/// disagrees with `snapshot`. Used by [`MultiUseSandbox::from_snapshot`] +/// to surface ignored caller-supplied layout values, since those +/// fields are always taken from the snapshot. +fn warn_on_layout_override( + caller: &crate::sandbox::SandboxConfiguration, + snapshot: &crate::mem::layout::SandboxMemoryLayout, +) { + let mismatches: &[(&str, u64, u64)] = &[ + ( + "input_data_size", + caller.get_input_data_size() as u64, + snapshot.input_data_size as u64, + ), + ( + "output_data_size", + caller.get_output_data_size() as u64, + snapshot.output_data_size as u64, + ), + ( + "heap_size", + caller.get_heap_size(), + snapshot.heap_size as u64, + ), + ( + "scratch_size", + caller.get_scratch_size() as u64, + snapshot.get_scratch_size() as u64, + ), + ]; + for (name, supplied, snap) in mismatches { + if supplied != snap { + tracing::warn!( + "from_snapshot ignoring caller-supplied {} ({}); using snapshot value ({})", + name, + supplied, + snap + ); + } + } +} + #[cfg(test)] mod tests { use std::sync::{Arc, Barrier}; From 34d1d4561a4bf9c446b879d4523ebab0bb8a5b12 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 11/15] Add snapshot file format with persistence and load Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- Cargo.lock | 21 + src/hyperlight_host/Cargo.toml | 1 + .../src/sandbox/snapshot/file.rs | 1031 +++++++++++++++++ .../src/sandbox/snapshot/mod.rs | 3 + 4 files changed, 1056 insertions(+) create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file.rs diff --git a/Cargo.lock b/Cargo.lock index 4b5b9a13a..cd8a9c387 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,6 +328,26 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bytes" version = "1.11.1" @@ -1528,6 +1548,7 @@ dependencies = [ "bitflags 2.11.1", "blake3", "built", + "bytemuck", "cfg-if", "cfg_aliases", "chrono", diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index abc24fab8..e8343a7ef 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -51,6 +51,7 @@ metrics = "0.24.3" serde_json = "1.0" elfcore = { version = "2.0", optional = true } uuid = { version = "1.23.1", features = ["v4"] } +bytemuck = { version = "1.16", features = ["derive"] } [target.'cfg(windows)'.dependencies] windows = { version = "0.62", features = [ diff --git a/src/hyperlight_host/src/sandbox/snapshot/file.rs b/src/hyperlight_host/src/sandbox/snapshot/file.rs new file mode 100644 index 000000000..8c06decdc --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file.rs @@ -0,0 +1,1031 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Snapshot file format: serialization, deserialization, and the +//! associated `Snapshot::to_file` / `Snapshot::from_file` methods. +//! +//! The on-disk byte layout is whatever the three `#[repr(C)]` POD +//! structs below declare: `RawPreamble`, `RawHeaderV1`, `RawSregs`. +//! Each derives `bytemuck::Pod` and `bytemuck::Zeroable`, which the +//! derive macro proves at compile time means there is no padding and +//! every bit pattern is a valid value of the struct's fields. There +//! are no separate numeric offsets to keep in sync with the code. +//! +//! All multi-byte integers are little-endian (gated by a +//! `compile_error!` on big-endian targets below). +//! +//! The fixed-position prefix is followed by an optional host-function +//! flatbuffer of length `host_funcs_size`, then zero padding to the +//! next PAGE_SIZE boundary, then the memory blob (the mmap target). +//! The memory blob's file offset is recorded in `memory_offset` and +//! is always page-aligned. A PAGE_SIZE trailing zero region follows +//! the blob (Windows guard-page backing). +//! +//! ```text +//! +----------------------+ +//! | RawPreamble | magic "HLS\0" + format_version +//! +----------------------+ +//! | RawHeaderV1 | arch, abi_version, hash, stack_top_gva, +//! | | entrypoint tag+addr, layout fields, +//! | | memory_size, memory_offset, has_sregs, +//! | | hypervisor, host_funcs_size +//! +----------------------+ +//! | RawSregs | segments, tables, control regs, bitmap. +//! | | Always written; ignored on load if +//! | | has_sregs == 0. +//! +----------------------+ +//! | host_funcs blob | host_funcs_size bytes (0 if absent), +//! | | serialized HostFunctionDetails flatbuffer +//! +----------------------+ +//! | zero padding | pads to next PAGE_SIZE boundary +//! +----------------------+ <- memory_offset +//! | memory blob | memory_size bytes (mmap target) +//! +----------------------+ +//! | trailing PAGE_SIZE | Windows guard-page backing; ignored on Linux +//! +----------------------+ +//! ``` +//! +//! `memory_offset == align_up(FIXED_PREFIX_SIZE + host_funcs_size, +//! PAGE_SIZE)`, where `FIXED_PREFIX_SIZE = sizeof(RawPreamble) + +//! sizeof(RawHeaderV1) + sizeof(RawSregs)`. With no host functions +//! this lands at exactly PAGE_SIZE. + +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +use hyperlight_common::vmem::PAGE_SIZE; + +use super::{NextAction, SANDBOX_CONFIGURATION_COUNTER, Snapshot}; +use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; + +const SNAPSHOT_MAGIC: &[u8; 4] = b"HLS\0"; + +/// ABI version for the snapshot memory blob. This must be bumped +/// whenever a change affects the contents or interpretation of the +/// memory blob - i.e., the contract between the host runtime and +/// the guest binary that determines how snapshot memory is produced +/// and consumed. +/// +/// Examples of changes that require a bump: +/// +/// - Memory layout: `SandboxMemoryLayout` offset computation, memory +/// region definitions, page table format +/// - Host-guest interface: PEB struct layout, calling convention, +/// dispatch mechanism, input/output buffer format +/// - Guest init state: entry point setup, GDT/IDT/TSS initialization, +/// or any startup code in `hyperlight_guest_bin` whose results are +/// captured in the snapshot (e.g. sregs) +/// +/// Unlike `FormatVersion` (which covers the file header byte layout +/// and may allow conversion between versions), an ABI mismatch means +/// the memory blob is incompatible and the snapshot must be +/// regenerated from the guest binary. +const SNAPSHOT_ABI_VERSION: u32 = 1; + +/// Maximum size of the host-functions flatbuffer blob in a snapshot +/// file. Bounds the allocation done at load time before the +/// flatbuffer is parsed. The legitimate size for hundreds of host +/// functions is well under this cap. +const MAX_HOST_FUNCS_SIZE: u64 = 1024 * 1024; + +/// Snapshot file format version. +#[derive(Copy, Clone, Debug, PartialEq)] +enum FormatVersion { + V1 = 1, +} + +impl FormatVersion { + fn from_u32(v: u32) -> crate::Result { + match v { + 1 => Ok(Self::V1), + _ => Err(crate::new_error!( + "unsupported snapshot format version {} (this build supports V1). \ + The file header layout may be convertible to the current format", + v + )), + } + } +} + +/// Architecture tag for snapshot files. +#[derive(Copy, Clone, Debug, PartialEq)] +enum ArchTag { + X86_64 = 1, + Aarch64 = 2, + I686 = 3, +} + +impl ArchTag { + fn current() -> Self { + #[cfg(feature = "i686-guest")] + { + Self::I686 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] + { + Self::X86_64 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "aarch64"))] + { + Self::Aarch64 + } + } + + fn from_u32(v: u32) -> crate::Result { + match v { + 1 => Ok(Self::X86_64), + 2 => Ok(Self::Aarch64), + 3 => Ok(Self::I686), + _ => Err(crate::new_error!("unknown architecture tag: {}", v)), + } + } +} + +/// Hypervisor tag for snapshot files. +/// +/// Segment register hidden-cache fields (unusable, type_, granularity, +/// db) differ between hypervisors for the same architectural state. +/// Restoring sregs captured on one hypervisor into another may be +/// rejected or produce subtly wrong behavior. The tag ensures +/// snapshots are only loaded on the same hypervisor that created them. +#[derive(Copy, Clone, Debug, PartialEq)] +pub(super) enum HypervisorTag { + Kvm = 1, + Mshv = 2, + Whp = 3, +} + +impl HypervisorTag { + pub(super) fn current() -> Option { + #[allow(unused_imports)] + use crate::hypervisor::virtual_machine::HypervisorType; + use crate::hypervisor::virtual_machine::get_available_hypervisor; + + match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Some(Self::Kvm), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Some(Self::Mshv), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Some(Self::Whp), + None => None, + } + } + + fn from_u64(v: u64) -> crate::Result { + match v { + 1 => Ok(Self::Kvm), + 2 => Ok(Self::Mshv), + 3 => Ok(Self::Whp), + _ => Err(crate::new_error!("unknown hypervisor tag: {}", v)), + } + } + + fn name(&self) -> &'static str { + match self { + Self::Kvm => "KVM", + Self::Mshv => "MSHV", + Self::Whp => "WHP", + } + } +} + +// All raw header structs use little-endian on-disk encoding. Both +// supported architectures (x86_64, aarch64) are little-endian, so we +// just refuse to compile on a hypothetical big-endian target rather +// than byte-swap on every field access. +#[cfg(not(target_endian = "little"))] +compile_error!("snapshot file format requires a little-endian target"); + +/// Memory layout fields stored in the snapshot file (validated form). +/// These are the primary inputs needed to reconstruct a `SandboxMemoryLayout`. +struct LayoutFields { + input_data_size: usize, + output_data_size: usize, + heap_size: usize, + code_size: usize, + init_data_size: usize, + init_data_permissions: Option, + scratch_size: usize, + snapshot_size: usize, + pt_size: Option, +} + +/// Fixed preamble at the start of every snapshot file (validated form). +/// Never changes across format versions so it can always be read to +/// determine which version-specific header follows. +struct SnapshotPreamble { + magic: [u8; 4], + format_version: FormatVersion, +} + +/// V1 snapshot header (validated form). +struct SnapshotHeaderV1 { + arch: ArchTag, + abi_version: u32, + stack_top_gva: u64, + entrypoint: NextAction, + layout: LayoutFields, + memory_size: usize, + memory_offset: u64, + has_sregs: bool, + hypervisor: HypervisorTag, + /// Byte length of the host-function-details flatbuffer that + /// follows the fixed header. `0` means no host functions are + /// stored. + host_funcs_size: u64, +} + +// --- Raw POD on-disk structs --- +// +// These mirror the bytes on disk one-for-one. Reading and writing +// goes through `bytemuck`; field-level validation lives in `From` / +// `TryFrom` impls below. + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawPreamble { + pub(super) magic: [u8; 4], + pub(super) format_version: u32, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawHeaderV1 { + pub(super) arch: u32, + pub(super) abi_version: u32, + pub(super) stack_top_gva: u64, + pub(super) entrypoint_tag: u64, + pub(super) entrypoint_addr: u64, + pub(super) input_data_size: u64, + pub(super) output_data_size: u64, + pub(super) heap_size: u64, + pub(super) code_size: u64, + pub(super) init_data_size: u64, + pub(super) init_data_permissions: u64, + pub(super) scratch_size: u64, + pub(super) snapshot_size: u64, + pub(super) pt_size: u64, + pub(super) memory_size: u64, + pub(super) memory_offset: u64, + pub(super) has_sregs: u64, + pub(super) hypervisor: u64, + pub(super) host_funcs_size: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawSegmentRegister { + base: u64, + limit: u64, + selector: u64, + type_: u64, + present: u64, + dpl: u64, + db: u64, + s: u64, + l: u64, + g: u64, + avl: u64, + unusable: u64, + padding: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawTableRegister { + base: u64, + limit: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawSregs { + cs: RawSegmentRegister, + ds: RawSegmentRegister, + es: RawSegmentRegister, + fs: RawSegmentRegister, + gs: RawSegmentRegister, + ss: RawSegmentRegister, + tr: RawSegmentRegister, + ldt: RawSegmentRegister, + gdt: RawTableRegister, + idt: RawTableRegister, + cr0: u64, + cr2: u64, + cr3: u64, + cr4: u64, + cr8: u64, + efer: u64, + apic_base: u64, + interrupt_bitmap: [u64; 4], +} + +/// Integrity hashes. Sits between the fixed structural prefix +/// (preamble + header + sregs) and the variable-length +/// host-functions blob, on the fixed/variable boundary. +/// +/// `header_hash` covers `preamble || header || sregs || +/// host_funcs_blob` (everything except the hashes themselves and +/// the memory blob). Always verified. +/// +/// `blob_hash` covers the memory blob. Skipped by +/// `from_file_unchecked`. `blob_hash` is one of the bytes covered +/// by `header_hash`, so an attacker cannot swap a blob without +/// invalidating the always-checked header hash. +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawHashes { + pub(super) header_hash: [u8; 32], + pub(super) blob_hash: [u8; 32], +} + +/// Total byte length of the fixed-position prefix of a V1 snapshot +/// file (preamble + header + sregs + hashes). The `bytemuck::Pod` +/// derives on the raw structs already guarantee they have no +/// padding, so this is exactly the on-disk byte count. +pub(super) const FIXED_PREFIX_SIZE: usize = std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::(); + +// --- Raw <-> rich conversions --- + +impl From<&SnapshotPreamble> for RawPreamble { + fn from(p: &SnapshotPreamble) -> Self { + Self { + magic: p.magic, + format_version: p.format_version as u32, + } + } +} + +impl TryFrom for SnapshotPreamble { + type Error = crate::HyperlightError; + fn try_from(raw: RawPreamble) -> crate::Result { + if &raw.magic != SNAPSHOT_MAGIC { + return Err(crate::new_error!( + "invalid snapshot file: bad magic bytes (expected {:?}, got {:?})", + SNAPSHOT_MAGIC, + raw.magic + )); + } + Ok(Self { + magic: raw.magic, + format_version: FormatVersion::from_u32(raw.format_version)?, + }) + } +} + +impl From<&SnapshotHeaderV1> for RawHeaderV1 { + fn from(h: &SnapshotHeaderV1) -> Self { + let (entrypoint_tag, entrypoint_addr) = match h.entrypoint { + NextAction::Initialise(a) => (0u64, a), + NextAction::Call(a) => (1u64, a), + #[cfg(test)] + NextAction::None => (u64::MAX, 0), + }; + let l = &h.layout; + Self { + arch: h.arch as u32, + abi_version: h.abi_version, + stack_top_gva: h.stack_top_gva, + entrypoint_tag, + entrypoint_addr, + input_data_size: l.input_data_size as u64, + output_data_size: l.output_data_size as u64, + heap_size: l.heap_size as u64, + code_size: l.code_size as u64, + init_data_size: l.init_data_size as u64, + init_data_permissions: l.init_data_permissions.map_or(0, |f| f.bits() as u64), + scratch_size: l.scratch_size as u64, + snapshot_size: l.snapshot_size as u64, + pt_size: l.pt_size.map_or(0, |v| v as u64), + memory_size: h.memory_size as u64, + memory_offset: h.memory_offset, + has_sregs: if h.has_sregs { 1 } else { 0 }, + hypervisor: h.hypervisor as u64, + host_funcs_size: h.host_funcs_size, + } + } +} + +impl TryFrom for SnapshotHeaderV1 { + type Error = crate::HyperlightError; + fn try_from(raw: RawHeaderV1) -> crate::Result { + use crate::mem::memory_region::MemoryRegionFlags; + + let arch = ArchTag::from_u32(raw.arch)?; + let entrypoint = match raw.entrypoint_tag { + 0 => NextAction::Initialise(raw.entrypoint_addr), + 1 => NextAction::Call(raw.entrypoint_addr), + _ => { + return Err(crate::new_error!( + "invalid entrypoint tag in snapshot: {}", + raw.entrypoint_tag + )); + } + }; + let init_data_permissions = if raw.init_data_permissions == 0 { + None + } else { + // Field is `u64` on disk for layout uniformity but the + // flag set is `u32`. Reject any high bits before + // narrowing so we don't silently truncate them. + let bits = u32::try_from(raw.init_data_permissions).map_err(|_| { + crate::new_error!( + "snapshot init_data_permissions ({:#x}) exceeds u32 range", + raw.init_data_permissions + ) + })?; + Some(MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!("snapshot contains unknown memory region flags: {:#x}", bits) + })?) + }; + let pt_size = if raw.pt_size == 0 { + None + } else { + Some(raw.pt_size as usize) + }; + let has_sregs = match raw.has_sregs { + 0 => false, + 1 => true, + other => { + return Err(crate::new_error!( + "snapshot has_sregs must be 0 or 1, got {}", + other + )); + } + }; + let hypervisor = HypervisorTag::from_u64(raw.hypervisor)?; + Ok(Self { + arch, + abi_version: raw.abi_version, + stack_top_gva: raw.stack_top_gva, + entrypoint, + layout: LayoutFields { + input_data_size: raw.input_data_size as usize, + output_data_size: raw.output_data_size as usize, + heap_size: raw.heap_size as usize, + code_size: raw.code_size as usize, + init_data_size: raw.init_data_size as usize, + init_data_permissions, + scratch_size: raw.scratch_size as usize, + snapshot_size: raw.snapshot_size as usize, + pt_size, + }, + memory_size: raw.memory_size as usize, + memory_offset: raw.memory_offset, + has_sregs, + hypervisor, + host_funcs_size: raw.host_funcs_size, + }) + } +} + +impl SnapshotHeaderV1 { + /// File-bound and environment validation: checks that a + /// well-formed header (already produced by `TryFrom`) is also + /// consistent with the actual file size and the runtime + /// environment (architecture, hypervisor, ABI version). + fn validate_against_file(&self, file_len: u64) -> crate::Result<()> { + if self.arch != ArchTag::current() { + return Err(crate::new_error!( + "snapshot architecture mismatch: expected {:?}, got {:?}", + ArchTag::current(), + self.arch + )); + } + if self.abi_version != SNAPSHOT_ABI_VERSION { + return Err(crate::new_error!( + "snapshot ABI version mismatch: file has ABI version {}, \ + but this build expects {}. The snapshot must be regenerated \ + from the guest binary.", + self.abi_version, + SNAPSHOT_ABI_VERSION + )); + } + let current_hv = HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to load snapshot"))?; + if self.hypervisor != current_hv { + return Err(crate::new_error!( + "snapshot hypervisor mismatch: file was created on {} but the current hypervisor is {}.", + self.hypervisor.name(), + current_hv.name() + )); + } + + if self.memory_offset == 0 || self.memory_offset % PAGE_SIZE as u64 != 0 { + return Err(crate::new_error!( + "invalid snapshot memory_offset {} (must be a non-zero multiple of PAGE_SIZE)", + self.memory_offset + )); + } + + // host_funcs region must fit between the fixed prefix and + // the page-aligned memory_offset. + if self.host_funcs_size > MAX_HOST_FUNCS_SIZE { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) exceeds maximum ({})", + self.host_funcs_size, + MAX_HOST_FUNCS_SIZE + )); + } + let after_hf = (FIXED_PREFIX_SIZE as u64) + .checked_add(self.host_funcs_size) + .ok_or_else(|| { + crate::new_error!( + "snapshot host_funcs_size ({}) overflows file offset", + self.host_funcs_size + ) + })?; + if after_hf > self.memory_offset { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) does not fit between fixed prefix and memory_offset ({})", + self.host_funcs_size, + self.memory_offset + )); + } + // host_funcs region must fit in the file too (defends + // against memory_offset being extended past the file end by + // a malicious header). + let max_host_funcs = file_len.saturating_sub(FIXED_PREFIX_SIZE as u64); + if self.host_funcs_size > max_host_funcs { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) exceeds remaining file bytes ({})", + self.host_funcs_size, + max_host_funcs + )); + } + + // Memory blob plus the trailing PAGE_SIZE guard must fit in + // the file. + let blob_end = self + .memory_offset + .checked_add(self.memory_size as u64) + .and_then(|n| n.checked_add(PAGE_SIZE as u64)) + .ok_or_else(|| crate::new_error!("snapshot memory blob bounds overflow"))?; + if blob_end > file_len { + return Err(crate::new_error!( + "snapshot memory blob extends past the end of the file (need {} bytes, file has {})", + blob_end, + file_len + )); + } + + // `entrypoint` and `has_sregs` must agree: a `Call` snapshot + // is mid-execution and therefore has a captured sregs state, + // while an `Initialise` snapshot has not yet run on the vCPU + // and has none. Anything else is a malformed header. + let call_entry = matches!(self.entrypoint, NextAction::Call(_)); + if call_entry != self.has_sregs { + return Err(crate::new_error!( + "snapshot entrypoint and has_sregs disagree: entrypoint requires sregs={}, has_sregs={}", + call_entry, + self.has_sregs + )); + } + + Ok(()) + } +} + +impl From<&CommonSpecialRegisters> for RawSregs { + fn from(s: &CommonSpecialRegisters) -> Self { + let seg = |r: &crate::hypervisor::regs::CommonSegmentRegister| RawSegmentRegister { + base: r.base, + limit: r.limit as u64, + selector: r.selector as u64, + type_: r.type_ as u64, + present: r.present as u64, + dpl: r.dpl as u64, + db: r.db as u64, + s: r.s as u64, + l: r.l as u64, + g: r.g as u64, + avl: r.avl as u64, + unusable: r.unusable as u64, + padding: r.padding as u64, + }; + let tab = |r: &crate::hypervisor::regs::CommonTableRegister| RawTableRegister { + base: r.base, + limit: r.limit as u64, + }; + Self { + cs: seg(&s.cs), + ds: seg(&s.ds), + es: seg(&s.es), + fs: seg(&s.fs), + gs: seg(&s.gs), + ss: seg(&s.ss), + tr: seg(&s.tr), + ldt: seg(&s.ldt), + gdt: tab(&s.gdt), + idt: tab(&s.idt), + cr0: s.cr0, + cr2: s.cr2, + cr3: s.cr3, + cr4: s.cr4, + cr8: s.cr8, + efer: s.efer, + apic_base: s.apic_base, + interrupt_bitmap: s.interrupt_bitmap, + } + } +} + +impl From for CommonSpecialRegisters { + fn from(r: RawSregs) -> Self { + use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister}; + // Truncating casts are intentional and lossless on + // well-formed input: the original fields have those widths + // and were widened to u64 only for on-disk uniformity. + let seg = |s: RawSegmentRegister| CommonSegmentRegister { + base: s.base, + limit: s.limit as u32, + selector: s.selector as u16, + type_: s.type_ as u8, + present: s.present as u8, + dpl: s.dpl as u8, + db: s.db as u8, + s: s.s as u8, + l: s.l as u8, + g: s.g as u8, + avl: s.avl as u8, + unusable: s.unusable as u8, + padding: s.padding as u8, + }; + let tab = |t: RawTableRegister| CommonTableRegister { + base: t.base, + limit: t.limit as u16, + }; + Self { + cs: seg(r.cs), + ds: seg(r.ds), + es: seg(r.es), + fs: seg(r.fs), + gs: seg(r.gs), + ss: seg(r.ss), + tr: seg(r.tr), + ldt: seg(r.ldt), + gdt: tab(r.gdt), + idt: tab(r.idt), + cr0: r.cr0, + cr2: r.cr2, + cr3: r.cr3, + cr4: r.cr4, + cr8: r.cr8, + efer: r.efer, + apic_base: r.apic_base, + interrupt_bitmap: r.interrupt_bitmap, + } + } +} + +impl Snapshot { + /// Save this snapshot to a file on disk. + /// + /// The file format uses a page-aligned memory blob that can be + /// mmapped directly on load for zero-copy instantiation. + /// + /// If a file already exists at `path`, it is truncated and + /// overwritten. + /// + /// # Portability + /// + /// Snapshot files are **not portable** across CPU architectures, + /// hypervisors, or operating systems. All three are checked at + /// load time and a mismatch produces an error. + pub fn to_file(&self, path: impl AsRef) -> crate::Result<()> { + use std::io::{BufWriter, Write}; + + let file = std::fs::File::create(path.as_ref()) + .map_err(|e| crate::new_error!("failed to create snapshot file: {}", e))?; + let mut w = BufWriter::new(file); + + let layout = &self.layout; + + // Serialize host-function metadata up-front so we can compute + // `memory_offset` (which depends on `host_funcs_size`) before + // writing the header. + let host_funcs_bytes: Vec = if self + .host_functions + .host_functions + .as_ref() + .is_some_and(|v| !v.is_empty()) + { + (&self.host_functions).try_into().map_err(|e| { + crate::new_error!("failed to serialize host function details: {:?}", e) + })? + } else { + Vec::new() + }; + + // The memory blob sits immediately after the host-function + // blob, page-aligned. With no host functions this lands at + // exactly PAGE_SIZE. + let memory_offset = + (FIXED_PREFIX_SIZE + host_funcs_bytes.len()).next_multiple_of(PAGE_SIZE) as u64; + + let preamble = SnapshotPreamble { + magic: *SNAPSHOT_MAGIC, + format_version: FormatVersion::V1, + }; + let v1 = SnapshotHeaderV1 { + arch: ArchTag::current(), + abi_version: SNAPSHOT_ABI_VERSION, + stack_top_gva: self.stack_top_gva, + entrypoint: self.entrypoint, + layout: LayoutFields { + input_data_size: layout.input_data_size, + output_data_size: layout.output_data_size, + heap_size: layout.heap_size, + code_size: layout.code_size, + init_data_size: layout.init_data_size, + init_data_permissions: layout.init_data_permissions, + scratch_size: layout.get_scratch_size(), + snapshot_size: layout.snapshot_size, + pt_size: layout.pt_size, + }, + memory_size: self.memory.mem_size(), + memory_offset, + has_sregs: self.sregs.is_some(), + hypervisor: HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to tag snapshot"))?, + host_funcs_size: host_funcs_bytes.len() as u64, + }; + let sregs = self.sregs.unwrap_or_default(); + + let raw_preamble = RawPreamble::from(&preamble); + let raw_header = RawHeaderV1::from(&v1); + let raw_sregs = RawSregs::from(&sregs); + + // `blob_hash` covers the memory blob. `header_hash` covers + // everything else of integrity interest: preamble, header, + // sregs, and the host_funcs blob. `blob_hash` is itself part + // of the bytes covered by `header_hash`, so swapping a blob + // without rewriting the header invalidates the always-checked + // header hash. + let blob_hash: [u8; 32] = blake3::hash(self.memory.as_slice()).into(); + let mut hasher = blake3::Hasher::new(); + hasher.update(bytemuck::bytes_of(&raw_preamble)); + hasher.update(bytemuck::bytes_of(&raw_header)); + hasher.update(bytemuck::bytes_of(&raw_sregs)); + hasher.update(&host_funcs_bytes); + let header_hash: [u8; 32] = hasher.finalize().into(); + let raw_hashes = RawHashes { + header_hash, + blob_hash, + }; + + w.write_all(bytemuck::bytes_of(&raw_preamble)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_header)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_sregs)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_hashes)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + // Host function metadata (variable length, not mmapped). + if !host_funcs_bytes.is_empty() { + w.write_all(&host_funcs_bytes) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + } + + // Zero-pad up to the page-aligned memory_offset so the blob + // is mmap-aligned in the file. + let pre_blob_pos = FIXED_PREFIX_SIZE + host_funcs_bytes.len(); + debug_assert!(pre_blob_pos <= memory_offset as usize); + w.write_all(&vec![0u8; memory_offset as usize - pre_blob_pos]) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + w.write_all(self.memory.as_slice()) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + // Trailing PAGE_SIZE padding: Windows read-only file mappings + // cannot extend beyond the file's actual size, so the file must + // contain backing bytes for the trailing guard page used by + // ReadonlySharedMemory::from_file_windows. Linux ignores this + // padding (its guard pages come from an anonymous mmap reservation). + w.write_all(&[0u8; PAGE_SIZE]) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + w.flush() + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + Ok(()) + } + + /// Load a snapshot from a file on disk. + /// + /// The memory blob is mapped directly from the file for zero-copy + /// loading using platform-specific CoW mechanisms. + /// + /// Returns an error if the file is from a different CPU + /// architecture, hypervisor, or OS than this host. See + /// [`Snapshot::to_file`] for the full portability story. + /// + /// Note: ELF unwind info (`LoadInfo`) is not persisted in the + /// snapshot file, so the `mem_profile` feature will not have + /// accurate profiling data for sandboxes created from disk + /// snapshots. + /// + /// # File-mutation hazard + /// + /// The file at `path` must not be modified, truncated, renamed + /// over, or deleted while the returned `Snapshot` (or any + /// [`MultiUseSandbox`](crate::MultiUseSandbox) constructed from + /// it) is still alive. + pub fn from_file(path: impl AsRef) -> crate::Result { + Self::from_file_impl(path, true) + } + + /// Load a snapshot from a file on disk without verifying the + /// memory blob's content hash. The fixed-prefix integrity check + /// (preamble + header + sregs + host_funcs) is still performed. + /// + /// This is faster for large snapshots in trusted environments + /// where blob integrity is guaranteed by other means. All other + /// portability checks (architecture, hypervisor, OS) still + /// apply. See [`Snapshot::to_file`] for details. + /// + /// # File-mutation hazard + /// + /// The file at `path` must not be modified, truncated, renamed + /// over, or deleted while the returned `Snapshot` (or any + /// [`MultiUseSandbox`](crate::MultiUseSandbox) constructed from + /// it) is still alive. + pub fn from_file_unchecked(path: impl AsRef) -> crate::Result { + Self::from_file_impl(path, false) + } + + fn from_file_impl( + path: impl AsRef, + verify_blob_hash: bool, + ) -> crate::Result { + use std::io::BufReader; + + let file = std::fs::File::open(path.as_ref()) + .map_err(|e| crate::new_error!("failed to open snapshot file: {}", e))?; + let file_len = file + .metadata() + .map_err(|e| crate::new_error!("snapshot stat error: {}", e))? + .len(); + let mut r = BufReader::new(&file); + + // Phase 1: read raw bytes into POD structs. + use std::io::Read; + let mut preamble_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut preamble_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let raw_preamble: RawPreamble = bytemuck::pod_read_unaligned(&preamble_buf); + // Validate magic + format version. Future format versions + // would dispatch here on `preamble.format_version`. + let preamble = SnapshotPreamble::try_from(raw_preamble)?; + let mut header_buf = [0u8; std::mem::size_of::()]; + let raw_v1: RawHeaderV1 = match preamble.format_version { + FormatVersion::V1 => { + r.read_exact(&mut header_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + bytemuck::pod_read_unaligned(&header_buf) + } + }; + let mut sregs_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut sregs_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let raw_sregs: RawSregs = bytemuck::pod_read_unaligned(&sregs_buf); + + let mut hashes_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut hashes_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let hashes: RawHashes = bytemuck::pod_read_unaligned(&hashes_buf); + + // Phase 2: parse + validate against the file size and the + // current runtime environment. + let hdr = SnapshotHeaderV1::try_from(raw_v1)?; + hdr.validate_against_file(file_len)?; + + // Read the optional host-function-details blob into a + // buffer. It is needed both for `header_hash` verification + // and for the flatbuffer parse below. + let mut host_funcs_buf = vec![0u8; hdr.host_funcs_size as usize]; + if !host_funcs_buf.is_empty() { + r.read_exact(&mut host_funcs_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + } + + // Phase 3: verify `header_hash` over (preamble || header || + // sregs || host_funcs). Always performed, regardless of + // `verify_blob_hash`. Cheap and closes the malicious-header + // surface. The hashes themselves sit on disk between sregs + // and host_funcs and are not part of what is hashed. + { + let mut hasher = blake3::Hasher::new(); + hasher.update(&preamble_buf); + hasher.update(&header_buf); + hasher.update(&sregs_buf); + hasher.update(&host_funcs_buf); + let computed: [u8; 32] = hasher.finalize().into(); + if computed != hashes.header_hash { + return Err(crate::new_error!( + "snapshot header_hash mismatch: file may be corrupted or tampered" + )); + } + } + + // Reconstruct layout + let l = &hdr.layout; + let mut cfg = crate::sandbox::SandboxConfiguration::default(); + cfg.set_input_data_size(l.input_data_size); + cfg.set_output_data_size(l.output_data_size); + cfg.set_heap_size(l.heap_size as u64); + cfg.set_scratch_size(l.scratch_size); + let mut layout = + SandboxMemoryLayout::new(cfg, l.code_size, l.init_data_size, l.init_data_permissions)?; + // Order matters: `set_pt_size` mutates `snapshot_size` + // internally, so call it before + if let Some(pt) = l.pt_size { + layout.set_pt_size(pt)?; + } + layout.set_snapshot_size(l.snapshot_size); + + let sregs = if hdr.has_sregs { + Some(CommonSpecialRegisters::from(raw_sregs)) + } else { + None + }; + + let host_functions = if !host_funcs_buf.is_empty() { + HostFunctionDetails::try_from(host_funcs_buf.as_slice()) + .map_err(|e| crate::new_error!("failed to parse host function details: {:?}", e))? + } else { + HostFunctionDetails { + host_functions: None, + } + }; + + // Map the memory blob directly from the file (zero-copy CoW). + // When the blob contains a PT tail (memory_size > snapshot_size), + // only snapshot_size bytes should be mapped into guest PA space. + let guest_mapped_size = if hdr.memory_size > layout.snapshot_size { + Some(layout.snapshot_size) + } else { + None + }; + let memory = ReadonlySharedMemory::from_file( + &file, + hdr.memory_offset.try_into().map_err(|_| { + crate::new_error!( + "snapshot memory_offset {} exceeds usize range", + hdr.memory_offset + ) + })?, + hdr.memory_size, + guest_mapped_size, + )?; + + // Phase 4: verify the memory blob's hash. Skipped by + // `from_file_unchecked` since this is the expensive check + // (proportional to blob size). + if verify_blob_hash { + let computed: [u8; 32] = blake3::hash(memory.as_slice()).into(); + if computed != hashes.blob_hash { + return Err(crate::new_error!( + "snapshot hash mismatch: file may be corrupted" + )); + } + } + + Ok(Snapshot { + sandbox_id: SANDBOX_CONFIGURATION_COUNTER + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + layout, + memory, + regions: Vec::new(), + load_info: crate::mem::exe::LoadInfo::dummy(), + // In-memory `Snapshot::hash` is `blake3(memory)` (matches + // `Snapshot::new`/`Snapshot::from_env`), used as the + // `PartialEq` key. This is the on-disk `blob_hash`. + hash: hashes.blob_hash, + stack_top_gva: hdr.stack_top_gva, + sregs, + entrypoint: hdr.entrypoint, + snapshot_generation: 0, + host_functions, + }) + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/mod.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs index b37b7f361..1bb183ea8 100644 --- a/src/hyperlight_host/src/sandbox/snapshot/mod.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -14,6 +14,9 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod file; +mod file_tests; + use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicU64, Ordering}; From a7e8dea140bcdfa24d3bdf5f301438e52c5d7862 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 16:01:13 -0700 Subject: [PATCH 12/15] Add tests for snapshot file format Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../src/sandbox/snapshot/file_tests.rs | 1803 +++++++++++++++++ 1 file changed, 1803 insertions(+) create mode 100644 src/hyperlight_host/src/sandbox/snapshot/file_tests.rs diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs new file mode 100644 index 000000000..f100a0dfa --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -0,0 +1,1803 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Tests for the snapshot file format (`super::file`). + +#![cfg(test)] + +use std::sync::Arc; + +use hyperlight_testing::simple_guest_as_string; + +use super::file::{FIXED_PREFIX_SIZE, HypervisorTag, RawHeaderV1, RawPreamble}; +use crate::sandbox::snapshot::Snapshot; +use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + +/// Absolute file offset of a `RawHeaderV1` field. Computed from +/// the struct definition so it stays correct if the field order +/// changes. +macro_rules! v1_offset { + ($field:ident) => { + std::mem::size_of::() + std::mem::offset_of!(RawHeaderV1, $field) + }; +} + +fn create_test_sandbox() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + UninitializedSandbox::new(GuestBinary::FilePath(path), None) + .unwrap() + .evolve() + .unwrap() +} + +fn create_snapshot_from_binary() -> Snapshot { + let path = simple_guest_as_string().unwrap(); + Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap() +} + +#[test] +fn from_snapshot_already_initialized_in_memory() { + // Test from_snapshot with a snapshot taken from an already-initialized + // sandbox (NextAction::Call), directly from memory without file I/O + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let new_snap = Snapshot { + sandbox_id: super::SANDBOX_CONFIGURATION_COUNTER + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + layout: *snapshot.layout(), + memory: snapshot.memory().clone(), + regions: snapshot.regions().to_vec(), + load_info: snapshot.load_info(), + hash: snapshot.hash, + stack_top_gva: snapshot.stack_top_gva(), + sregs: snapshot.sregs().cloned(), + entrypoint: snapshot.entrypoint(), + snapshot_generation: snapshot.snapshot_generation(), + host_functions: snapshot.host_functions.clone(), + }; + + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(new_snap), HostFunctions::default(), None).unwrap(); + let result: i32 = sbox2.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn from_snapshot_in_memory() { + // Test from_snapshot pathway using the existing Snapshot::from_env + let path = simple_guest_as_string().unwrap(); + let snap = Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap(); + + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(snap), HostFunctions::default(), None).unwrap(); + + // from_env creates a snapshot with NextAction::Initialise, + // so from_snapshot will run the init code via vm.initialise() + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn round_trip_save_load_call() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("test.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +#[test] +fn snapshot_and_pt_size_round_trip() { + // Running-sandbox snapshot. + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original_snapshot_size = snap.layout().snapshot_size; + let original_pt_size = snap.layout().pt_size; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("running.hls"); + snap.to_file(&path).unwrap(); + + let loaded = Snapshot::from_file(&path).unwrap(); + assert_eq!(loaded.layout().snapshot_size, original_snapshot_size); + assert_eq!(loaded.layout().pt_size, original_pt_size); + + // Pre-init snapshot. + let preinit = create_snapshot_from_binary(); + let preinit_snapshot_size = preinit.layout().snapshot_size; + let preinit_pt_size = preinit.layout().pt_size; + + let path = dir.path().join("preinit.hls"); + preinit.to_file(&path).unwrap(); + + let loaded = Snapshot::from_file(&path).unwrap(); + assert_eq!(loaded.layout().snapshot_size, preinit_snapshot_size); + assert_eq!(loaded.layout().pt_size, preinit_pt_size); +} + +#[test] +fn hash_verification_detects_corruption() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("corrupted.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt a byte in the memory blob (after the 4096-byte header) + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(4096 + 100)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(4096 + 100)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with hash mismatch"), + }; + assert!( + err_msg.contains("hash mismatch"), + "expected hash mismatch error, got: {}", + err_msg + ); +} + +#[test] +fn arch_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_arch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the architecture tag + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(arch) as u64)).unwrap(); + file.write_all(&99u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with arch mismatch"), + }; + assert!( + err_msg.contains("architecture"), + "expected arch-related error, got: {}", + err_msg + ); +} + +#[test] +fn format_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_version.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the format version + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start( + std::mem::offset_of!(RawPreamble, format_version) as u64, + )) + .unwrap(); + file.write_all(&999u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with version mismatch"), + }; + assert!( + err_msg.contains("format version"), + "expected version mismatch error, got: {}", + err_msg + ); + assert!( + err_msg.contains("convertible"), + "expected hint about convertibility, got: {}", + err_msg + ); +} + +#[test] +fn abi_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_abi.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the ABI version + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(abi_version) as u64)) + .unwrap(); + file.write_all(&999u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with ABI version mismatch"), + }; + assert!( + err_msg.contains("ABI version mismatch"), + "expected ABI version mismatch error, got: {}", + err_msg + ); + assert!( + err_msg.contains("regenerated"), + "expected hint about regeneration, got: {}", + err_msg + ); +} + +#[test] +fn hypervisor_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_hv.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the hypervisor tag with a valid but wrong tag. + let current = HypervisorTag::current().unwrap(); + let wrong_tag = match current { + HypervisorTag::Whp => HypervisorTag::Kvm, + _ => HypervisorTag::Whp, + }; + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(hypervisor) as u64)) + .unwrap(); + file.write_all(&(wrong_tag as u64).to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with hypervisor mismatch"), + }; + assert!( + err_msg.contains("hypervisor mismatch"), + "expected hypervisor mismatch error, got: {}", + err_msg + ); +} + +#[test] +fn restore_from_loaded_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("restore.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + // Mutate state + sbox.call::("AddToStatic", 42i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 42); + + // Take a new snapshot and restore to it + let snap2 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 52); + + sbox.restore(snap2).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 42); +} + +#[test] +fn restore_to_original_file_snapshot() { + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("original.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox.call::("AddToStatic", 42i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 52); + + sbox.restore(loaded).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 10); +} + +/// Sandboxes built from clones of the same `Arc` must +/// be mutually `restore`-compatible (they share the same +/// `sandbox_id`). Conversely, two `Snapshot::from_file` calls of +/// the same path return distinct snapshots; that property is +/// exercised by `restore_to_different_file_loaded_snapshot_rejected`. +#[test] +fn sandboxes_from_shared_arc_snapshot_can_restore_to_each_other() { + let mut producer = create_test_sandbox(); + let snapshot = producer.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("shared_id.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + // Take an in-process snapshot from one sibling. That snapshot + // inherits its sandbox's id, which must match every other sandbox + // built from the same `Arc`. + sbox1.call::("AddToStatic", 7i32).unwrap(); + let mid_snap = sbox1.snapshot().unwrap(); + + let mut sbox2 = MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + // Restoring `sbox2` to a snapshot taken from `sbox1` must + // succeed because they share the same id. + sbox2.restore(mid_snap).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 7); +} + +/// A single `Arc` loaded from disk must be safely shared +/// across many `from_snapshot` calls. Each resulting sandbox gets +/// its own CoW view and must be independent of the others. +#[test] +fn many_sandboxes_share_single_arc_snapshot() { + const N: usize = 8; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("shared_arc.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sandboxes: Vec = (0..N) + .map(|_| { + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap() + }) + .collect(); + + // Each sandbox writes a unique value and must observe its own write. + for (i, sbox) in sandboxes.iter_mut().enumerate() { + sbox.call::("AddToStatic", (i as i32 + 1) * 10) + .unwrap(); + } + for (i, sbox) in sandboxes.iter_mut().enumerate() { + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + val, + (i as i32 + 1) * 10, + "sandbox {i} must observe its own write", + ); + } + + // Dropping the original Arc while sandboxes are still + // alive must not invalidate their CoW mappings. + drop(loaded); + for (i, sbox) in sandboxes.iter_mut().enumerate() { + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + val, + (i as i32 + 1) * 10, + "sandbox {i} must still work after the source Arc is dropped", + ); + } +} + +/// Multiple sandboxes built from the same on-disk snapshot must +/// behave correctly under concurrent use from multiple threads. +#[test] +fn concurrent_sandboxes_from_same_file() { + use std::thread; + + const N: usize = 8; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("concurrent.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let handles: Vec<_> = (0..N) + .map(|i| { + let loaded = loaded.clone(); + thread::spawn(move || { + let mut sbox = + MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + let increment = (i as i32 + 1) * 7; + for _ in 0..5 { + sbox.call::("AddToStatic", increment).unwrap(); + } + let final_val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + final_val, + increment * 5, + "thread {i} must see its own writes" + ); + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Snapshots loaded from the same file must be restorable +/// independently from each other after concurrent mutations. +#[test] +fn restore_works_per_sandbox_with_shared_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("restore_shared.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox1.call::("AddToStatic", 100i32).unwrap(); + sbox2.call::("AddToStatic", 200i32).unwrap(); + + sbox1.restore(loaded.clone()).unwrap(); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 0); + // sbox2 must be unaffected by sbox1's restore. + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 200); + + sbox2.restore(loaded).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +/// Pre-init snapshots (NextAction::Initialise) round-tripped through +/// a file must be usable concurrently by multiple sandboxes. This is +/// distinct from already-initialised (`Call`) snapshots because each +/// sandbox runs the guest init code under `vm.initialise()`. +#[test] +fn multiple_sandboxes_from_pre_init_file() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("preinit_shared.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + let mut sbox2 = MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + + sbox1.call::("AddToStatic", 11i32).unwrap(); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 11); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); + + sbox2.call::("AddToStatic", 22i32).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 22); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 11); +} + +#[test] +fn snapshot_then_save_round_trip() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path1 = dir.path().join("first.hls"); + snapshot.to_file(&snap_path1).unwrap(); + + // Load, create sandbox, mutate, take snapshot, save again + let loaded = Snapshot::from_file(&snap_path1).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + sbox2.call::("AddToStatic", 77i32).unwrap(); + let snap2 = sbox2.snapshot().unwrap(); + + let snap_path2 = dir.path().join("second.hls"); + snap2.to_file(&snap_path2).unwrap(); + + // Load the second snapshot and verify mutated state + let loaded2 = Snapshot::from_file(&snap_path2).unwrap(); + let mut sbox3 = + MultiUseSandbox::from_snapshot(Arc::new(loaded2), HostFunctions::default(), None).unwrap(); + + let val: i32 = sbox3.call("GetStatic", ()).unwrap(); + assert_eq!(val, 77); +} + +/// `MultiUseSandbox::from_snapshot` should register the default +/// `HostPrint` host function, just like the regular codepath. +#[test] +fn from_snapshot_has_default_host_print() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("test.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result = sbox2.call::("PrintOutput", "hello from snapshot".to_string()); + assert!( + result.is_ok(), + "PrintOutput should succeed because HostPrint is registered by from_snapshot: {:?}", + result.unwrap_err() + ); +} + +#[test] +fn from_file_unchecked_skips_hash_verification() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("unchecked.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt a byte in the memory blob (past the header) + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + // Write garbage into the memory blob region + file.seek(SeekFrom::Start(4096 + 64)).unwrap(); + file.write_all(&[0xFF; 16]).unwrap(); + } + + // from_file (with hash check) should fail + let result = Snapshot::from_file(&snap_path); + assert!(result.is_err(), "from_file should detect corruption"); + + // from_file_unchecked should succeed despite corruption + let loaded = Snapshot::from_file_unchecked(&snap_path); + assert!(loaded.is_ok(), "from_file_unchecked should skip hash check"); +} + +/// Sandbox built with a custom host function — a snapshot taken +/// from it must persist the function's signature, and loading +/// requires the same function to be registered. +fn create_sandbox_with_custom_host_funcs() -> MultiUseSandbox { + use crate::func::Registerable; + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.evolve().unwrap() +} + +#[test] +fn from_snapshot_accepts_matching_host_functions() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("with_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + funcs + .register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + assert!( + result.is_ok(), + "from_snapshot should accept matching host fns: {:?}", + result.err() + ); +} + +#[test] +fn from_snapshot_rejects_missing_host_function() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("missing_fn.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + // Don't register "Add" — only the default HostPrint. + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None); + let err = result.expect_err("expected missing-fn rejection"); + let msg = format!("{}", err); + assert!( + msg.contains("missing") && msg.contains("Add"), + "unexpected error message: {}", + msg + ); +} + +#[test] +fn from_snapshot_rejects_signature_mismatch() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("sig_mismatch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + // Wrong signature: snapshot has (i32, i32) -> i32, register (String) -> i32. + funcs + .register_host_function("Add", |_s: String| Ok(0i32)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + let err = result.expect_err("expected signature mismatch"); + let msg = format!("{}", err); + assert!( + msg.contains("signature_mismatches") && msg.contains("Add"), + "unexpected error message: {}", + msg + ); +} + +#[test] +fn from_snapshot_allows_extra_host_functions() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("extra_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + funcs + .register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + // Extra functions not in the snapshot — superset is allowed. + funcs + .register_host_function("Extra", |x: i64| Ok(x * 2)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + assert!( + result.is_ok(), + "extras should be allowed (superset semantics): {:?}", + result.err() + ); +} + +/// Register enough host functions on the sandbox that the +/// serialized `HostFunctionDetails` flatbuffer exceeds a single +/// page, exercising the variable-`memory_offset` path. Verifies +/// that the saved file round-trips cleanly and that +/// `from_snapshot` correctly accepts a matching set. +#[test] +fn from_snapshot_with_many_host_functions_round_trip() { + use hyperlight_common::vmem::PAGE_SIZE; + + use crate::func::Registerable; + + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + // Register many host functions with long names so the + // serialized flatbuffer comfortably exceeds PAGE_SIZE. + const N: usize = 200; + for i in 0..N { + let name = format!("HostFunc_with_a_reasonably_long_name_{:04}", i); + u.register_host_function(&name, |a: i32, b: i32| Ok(a + b)) + .unwrap(); + } + let mut sbox = u.evolve().unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("many_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Sanity-check that the file's recorded memory_offset is + // larger than a single page (the host-function blob spilled + // beyond the fixed header). + { + use std::io::{Read, Seek, SeekFrom}; + let mut f = std::fs::File::open(&snap_path).unwrap(); + f.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + let mut buf = [0u8; 8]; + f.read_exact(&mut buf).unwrap(); + let memory_offset = u64::from_le_bytes(buf) as usize; + assert!( + memory_offset > PAGE_SIZE, + "expected memory_offset > PAGE_SIZE for large host_funcs (got {})", + memory_offset + ); + } + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + for i in 0..N { + let name = format!("HostFunc_with_a_reasonably_long_name_{:04}", i); + funcs + .register_host_function(&name, |a: i32, b: i32| Ok(a + b)) + .unwrap(); + } + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None).unwrap(); + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +/// A file with the wrong magic bytes should be rejected with a +/// descriptive error. +#[test] +fn bad_magic_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_magic.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the 4-byte magic at offset 0. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(b"XXXX").unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected magic mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("magic"), + "expected magic-related error, got: {}", + msg + ); +} + +/// A file truncated to less than the fixed header should be +/// rejected at header read time, not panic. +#[test] +fn truncated_file_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("truncated.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Truncate to 100 bytes (well below the fixed header). + std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap() + .set_len(100) + .unwrap(); + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected truncation error"), + }; + let msg = format!("{}", err); + // Either "truncated" (read_bytes) or "snapshot read error" (read_u64) — + // both are acceptable; just assert no panic and an error came back. + assert!( + msg.contains("truncated") || msg.contains("read error"), + "expected truncation/read error, got: {}", + msg + ); +} + +/// A file whose `host_funcs_size` claims more bytes than the +/// host-funcs region actually contains should be rejected +/// without panic. +#[test] +fn corrupt_host_funcs_size_rejected() { + // Use a sandbox with at least one custom host function so the + // host-funcs region exists in the file. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_hf_size.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite host_funcs_size with a huge value that exceeds + // the file. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(host_funcs_size) as u64)) + .unwrap(); + file.write_all(&u64::MAX.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected host_funcs_size error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("host_funcs_size"), + "expected host_funcs_size error, got: {}", + msg + ); +} + +/// A `host_funcs_size` that fits within the file but exceeds the +/// fixed cap must be rejected before the loader tries to allocate +/// a buffer of that size. +#[test] +fn oversized_host_funcs_size_rejected() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("oversized_hf.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Pad the file so a value that's well above the cap still + // fits within `file_len` (otherwise the existing + // "exceeds remaining file bytes" check would catch it first). + let bloated = 64 * 1024 * 1024_u64; + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + let cur_len = file.metadata().unwrap().len(); + file.seek(SeekFrom::Start(cur_len)).unwrap(); + file.write_all(&vec![0u8; bloated as usize]).unwrap(); + file.seek(SeekFrom::Start(v1_offset!(host_funcs_size) as u64)) + .unwrap(); + file.write_all(&bloated.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized host_funcs_size error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("exceeds maximum"), + "expected cap error, got: {}", + msg + ); +} + +/// `memory_offset` of 0 is structurally invalid because the memory +/// blob would overlap the fixed prefix. +#[test] +fn memory_offset_zero_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_offset_zero.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + file.write_all(&0u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected memory_offset=0 to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory_offset"), + "expected memory_offset error, got: {}", + msg + ); +} + +/// `memory_offset` must be a multiple of `PAGE_SIZE` so the memory +/// blob can be mmapped directly. A non-aligned offset must be +/// rejected. +#[test] +fn memory_offset_unaligned_rejected() { + use hyperlight_common::vmem::PAGE_SIZE; + + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_offset_unaligned.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + let bad = (PAGE_SIZE as u64) + 1; + file.write_all(&bad.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected unaligned memory_offset to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory_offset") && msg.contains("PAGE_SIZE"), + "expected page-alignment error, got: {}", + msg + ); +} + +/// `memory_size` that would push the memory blob past the end of +/// the file must be rejected. +#[test] +fn memory_blob_extends_past_eof_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_size_overflow.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + let file_len = file.metadata().unwrap().len(); + file.seek(SeekFrom::Start(v1_offset!(memory_size) as u64)) + .unwrap(); + // A value that fits in u64 but is much larger than the + // file, so the blob bound check trips before any add + // overflows. + file.write_all(&(file_len * 2).to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized memory blob to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory blob") && msg.contains("end of the file"), + "expected blob-end error, got: {}", + msg + ); +} + +/// `entrypoint_tag` is a u64 discriminant for `NextAction`. Only +/// values 0 (Initialise) and 1 (Call) are defined. Anything else +/// must be rejected when parsing the raw header. +#[test] +fn invalid_entrypoint_tag_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_entrypoint_tag.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(entrypoint_tag) as u64)) + .unwrap(); + file.write_all(&0xDEADu64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected invalid entrypoint tag to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("entrypoint tag"), + "expected entrypoint-tag error, got: {}", + msg + ); +} + +/// `init_data_permissions` is stored as `u64` on disk but the in +/// memory flag set is `u32`. Any value with bits beyond the u32 +/// range must be rejected before narrowing so that high bits do not +/// silently disappear. +#[test] +fn init_data_permissions_oversized_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("oversized_perms.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(init_data_permissions) as u64)) + .unwrap(); + // High 32 bits set so `u32::try_from` fails. + file.write_all(&(1u64 << 33).to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized init_data_permissions to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("init_data_permissions") && msg.contains("u32"), + "expected u32-range error, got: {}", + msg + ); +} + +/// `has_sregs` is serialized as `u64` for on-disk uniformity but is +/// semantically a boolean. Any value other than 0 or 1 must be +/// rejected at parse time. +#[test] +fn invalid_has_sregs_value_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_has_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&2u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected has_sregs validation error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// A `Call` snapshot is mid-execution and must carry sregs. +/// Flipping `has_sregs` to 0 on such a snapshot must be rejected. +#[test] +fn call_snapshot_without_sregs_rejected() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("call_no_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&0u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected entrypoint/has_sregs mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// An `Initialise` snapshot has not yet run on the vCPU and must +/// not carry sregs. Flipping `has_sregs` to 1 must be rejected. +#[test] +fn initialise_snapshot_with_sregs_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("init_with_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&1u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected entrypoint/has_sregs mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// `header_hash` covers the preamble, header, sregs, and host_funcs +/// blob. Any mutation of those regions must trip verification, even +/// via `from_file_unchecked`. +#[test] +fn header_mutation_caught_by_hash() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("hdr_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Flip a byte in `stack_top_gva` to mutate the header in place. + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(stack_top_gva) as u64)) + .unwrap(); + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).unwrap(); + buf[0] ^= 0xFF; + file.seek(SeekFrom::Start(v1_offset!(stack_top_gva) as u64)) + .unwrap(); + file.write_all(&buf).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("header mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); + + // `from_file_unchecked` skips the blob hash but still verifies + // the header hash, so it must also reject this. + let err = match Snapshot::from_file_unchecked(&snap_path) { + Err(e) => e, + Ok(_) => panic!("header mutation must be detected even by from_file_unchecked"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// Sregs sit between the header and the host_funcs blob. Mutating +/// any sregs byte must trip `header_hash` verification. +#[test] +fn sregs_mutation_caught_by_hash() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("sregs_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Flip the first byte of the sregs region (just after the + // RawHeaderV1 ends). + let sregs_offset = std::mem::size_of::() + std::mem::size_of::(); + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(sregs_offset as u64)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(sregs_offset as u64)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("sregs mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// The host-functions flatbuffer blob is part of `header_hash`. +/// Mutating its bytes must trip verification. +#[test] +fn host_funcs_mutation_caught_by_hash() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("hf_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let hf_offset = FIXED_PREFIX_SIZE; + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(hf_offset as u64)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(hf_offset as u64)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("host_funcs mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// `MAP_PRIVATE` / `FILE_MAP_COPY` invariant: guest writes +/// through a file-backed snapshot must NOT modify the on-disk +/// file. Verifies this by hashing the raw bytes before and after +/// running guest functions that mutate state. +#[test] +fn cow_does_not_mutate_backing_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("cow.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let hash_before: [u8; 32] = blake3::hash(&std::fs::read(&snap_path).unwrap()).into(); + + // Load the snapshot and have the guest write into mapped memory. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + sbox.call::("AddToStatic", 999i32).unwrap(); + + // Drop the sandbox to ensure mappings are released before re-reading. + drop(sbox); + + let hash_after: [u8; 32] = blake3::hash(&std::fs::read(&snap_path).unwrap()).into(); + assert_eq!( + hash_before, hash_after, + "guest writes must not propagate to the backing snapshot file" + ); +} + +/// Pre-init snapshot (`from_env`) round-tripped through a file +/// must still complete guest initialisation on load. +#[test] +fn pre_init_snapshot_save_load() { + use super::NextAction; + + let snapshot = create_snapshot_from_binary(); + // Guard: this constructor produces a `NextAction::Initialise` + // snapshot. If that ever changes, this test loses its purpose. + assert!( + matches!(snapshot.entrypoint(), NextAction::Initialise(_)), + "expected pre-init snapshot from from_env" + ); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("preinit.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + assert!( + matches!(loaded.entrypoint(), NextAction::Initialise(_)), + "pre-init entrypoint should round-trip" + ); + + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + // Guest init must run via vm.initialise() before the call works. + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +/// `from_file_unchecked` skips the blake3 hash check but must +/// still validate the rest of the header (magic, format version, +/// architecture, ABI version, hypervisor tag). +#[test] +fn from_file_unchecked_still_validates_header() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("unchecked_bad_arch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt the architecture tag to a bogus value. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(arch) as u64)).unwrap(); + file.write_all(&99u32.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file_unchecked(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected arch validation to fail even without hash check"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("architecture"), + "expected arch error from from_file_unchecked, got: {}", + msg + ); +} + +// Tests for `MultiUseSandbox::from_snapshot` `SandboxConfiguration` +// plumbing. Layout fields must be silently overridden by the snapshot. +// Runtime fields (interrupt knobs, gdb, crashdump) must take effect. +// `interrupt_*` are covered by `interrupt_custom_signal_no_and_retry_delay` +// in `tests/integration_test.rs`. `guest_debug_info` (gdb) needs an +// in-test gdb stub and is not exercised here. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative because the +/// on-disk memory blob already encodes those sizes. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().scratch_size; + + // Build a config whose every layout field is different from the + // snapshot's layout. `from_snapshot` must ignore all of them. + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + // The new sandbox must be fully usable. + sbox2.call::("GetStatic", ()).unwrap(); + + // The new sandbox's layout must match the snapshot's, not the + // override config. + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().scratch_size, original_scratch); +} + +/// `from_snapshot` must honor `guest_core_dump=true` from the supplied +/// config so that `generate_crashdump_to_dir` actually writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file to be created when guest_core_dump=true" + ); +} + +/// `from_snapshot` must honor `guest_core_dump=false` from the supplied +/// config so that `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// `from_file` on a non-existent path must return an error rather +/// than panicking. +#[test] +fn from_file_nonexistent_path_returns_error() { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("does_not_exist.hls"); + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected I/O error for missing file"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("failed to open snapshot file"), + "expected open-failure message, got: {}", + msg + ); +} + +/// `to_file` must succeed when overwriting an existing file, and +/// the resulting file must be loadable. +#[test] +fn to_file_overwrites_existing() { + let mut sbox = create_test_sandbox(); + let snap1 = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("overwrite.hls"); + + snap1.to_file(&snap_path).unwrap(); + let first_size = std::fs::metadata(&snap_path).unwrap().len(); + assert!(first_size > 0); + + // Mutate sandbox state and snapshot again, overwriting the same file. + sbox.call::("AddToStatic", 314i32).unwrap(); + let snap2 = sbox.snapshot().unwrap(); + snap2.to_file(&snap_path).unwrap(); + + // Load the overwritten file and verify it observes the second + // snapshot's state. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 314); +} + +/// `from_snapshot`-built sandbox must support `map_file_cow` of a +/// host file and the guest must read back the file contents. +#[test] +fn map_file_cow_after_from_snapshot() { + use std::io::Write; + + // Build a snapshot from disk. + let mut producer = create_test_sandbox(); + let snap = producer.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("for_map.hls"); + snap.to_file(&snap_path).unwrap(); + + // Build a host file with known contents to map into the sandbox. + let page_size = page_size::get(); + let payload = b"hello from map_file_cow after from_snapshot"; + let mut padded = vec![0u8; page_size]; + padded[..payload.len()].copy_from_slice(payload); + let file_path = dir.path().join("mapped_payload.bin"); + std::fs::File::create(&file_path) + .unwrap() + .write_all(&padded) + .unwrap(); + + // Construct a sandbox from the on-disk snapshot and map the file. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let guest_base: u64 = 0x1_0000_0000; + let mapped_size = sbox.map_file_cow(&file_path, guest_base, None).unwrap(); + assert!(mapped_size as usize >= payload.len()); + + // Read back from the guest and verify byte-for-byte equality. + let actual: Vec = sbox + .call("ReadMappedBuffer", (guest_base, payload.len() as u64, true)) + .unwrap(); + assert_eq!(actual, payload); +} + +/// A sandbox restored from a file-loaded snapshot must still be +/// snapshottable, and the new snapshot must save and reload +/// correctly. +#[test] +fn snapshot_after_restore_to_file_loaded_baseline() { + let mut producer = create_test_sandbox(); + let baseline = producer.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let baseline_path = dir.path().join("baseline.hls"); + baseline.to_file(&baseline_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&baseline_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + // Mutate, restore to the file baseline, mutate to a new value, + // then snapshot the post-restore sandbox. + sbox.call::("AddToStatic", 7i32).unwrap(); + sbox.restore(loaded).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.call::("AddToStatic", 99i32).unwrap(); + + let new_snap = sbox.snapshot().unwrap(); + let new_path = dir.path().join("after_restore.hls"); + new_snap.to_file(&new_path).unwrap(); + + // Load the new snapshot in a fresh sandbox and verify state. + let reloaded = Snapshot::from_file(&new_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(reloaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 99); +} + +/// `from_file` on an empty file must return an error rather than +/// panicking. +#[test] +fn from_file_empty_file_returns_error() { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("empty.hls"); + std::fs::File::create(&snap_path).unwrap(); + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected error from zero-byte file"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("snapshot read error") || msg.contains("truncated"), + "expected truncation/read error, got: {}", + msg + ); +} + +/// `to_file` to a path inside a non-existent directory must return +/// an I/O error rather than panicking. +#[test] +fn to_file_nonexistent_directory_returns_error() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("does_not_exist").join("snap.hls"); + let err = match snapshot.to_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected error writing to nonexistent directory"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("failed to create snapshot file"), + "expected create-failure message, got: {}", + msg + ); +} + +/// Restore is currently rejected when the target snapshot was +/// loaded from a different file than the sandbox was built from. +/// `Snapshot::sandbox_id` is a process-local atomic counter assigned +/// fresh on every `from_file`, so the ids never match. Documented as +/// a known limitation in `MultiUseSandbox::from_snapshot` and tracked +/// by the `TODO` to replace ids with a `SandboxMemoryLayout`-equality +/// check. +#[test] +fn restore_to_different_file_loaded_snapshot_rejected() { + let mut producer = create_test_sandbox(); + + let dir = tempfile::tempdir().unwrap(); + + // Snapshot A: zero state. + let snap_a_path = dir.path().join("a.hls"); + producer.snapshot().unwrap().to_file(&snap_a_path).unwrap(); + + // Snapshot B: state with AddToStatic(50). + producer.call::("AddToStatic", 50i32).unwrap(); + let snap_b_path = dir.path().join("b.hls"); + producer.snapshot().unwrap().to_file(&snap_b_path).unwrap(); + + let loaded_a = Arc::new(Snapshot::from_file(&snap_a_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded_a, HostFunctions::default(), None).unwrap(); + + let loaded_b = Arc::new(Snapshot::from_file(&snap_b_path).unwrap()); + let err = match sbox.restore(loaded_b) { + Err(e) => e, + Ok(_) => panic!("expected SnapshotSandboxMismatch from cross-file restore"), + }; + let msg = format!("{:?}", err); + assert!( + msg.contains("SnapshotSandboxMismatch"), + "expected SnapshotSandboxMismatch, got: {}", + msg + ); +} + +/// Two independent `Snapshot::from_file` calls of the same path +/// must each yield a usable snapshot. Sandboxes built from each +/// must work independently and produce isolated CoW state. +#[test] +fn multiple_from_file_calls_of_same_path() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("multi_load.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded_a = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let loaded_b = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox_a = + MultiUseSandbox::from_snapshot(loaded_a, HostFunctions::default(), None).unwrap(); + let mut sbox_b = + MultiUseSandbox::from_snapshot(loaded_b, HostFunctions::default(), None).unwrap(); + + sbox_a.call::("AddToStatic", 11i32).unwrap(); + sbox_b.call::("AddToStatic", 22i32).unwrap(); + + assert_eq!(sbox_a.call::("GetStatic", ()).unwrap(), 11); + assert_eq!(sbox_b.call::("GetStatic", ()).unwrap(), 22); +} + +/// Loading a file via `Snapshot::from_file` after the file has been +/// rewritten with a different snapshot must observe the new contents. +/// Documents the load-once / no-cache semantic. +#[test] +fn from_file_after_overwrite_observes_new_contents() { + let mut sbox = create_test_sandbox(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("evolving.hls"); + + // Write a snapshot at state X. + sbox.snapshot().unwrap().to_file(&snap_path).unwrap(); + // Load and immediately drop. On Windows, an overwriting `to_file` + // call on a path with an active mapped view fails with + // `ERROR_USER_MAPPED_FILE` (1224), so the loaded snapshot must be + // released before re-writing the same path. + { + let _loaded_x = Snapshot::from_file(&snap_path).unwrap(); + } + + // Mutate and overwrite with a snapshot at state Y. + sbox.call::("AddToStatic", 55i32).unwrap(); + sbox.snapshot().unwrap().to_file(&snap_path).unwrap(); + + // A subsequent `from_file` of the same path must reflect Y. + let loaded_y = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox_y = + MultiUseSandbox::from_snapshot(Arc::new(loaded_y), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_y.call::("GetStatic", ()).unwrap(), 55); +} From dcd58a674bb2ad289bc48d3217852df020396fd1 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Thu, 30 Apr 2026 18:54:14 -0700 Subject: [PATCH 13/15] Add snapshot file benchmarks Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- src/hyperlight_host/benches/benchmarks.rs | 100 +++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 462e8908d..e35d2c175 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -551,6 +551,103 @@ fn shared_memory_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Snapshot Files +// ============================================================================ + +fn snapshot_file_benchmark(c: &mut Criterion) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let mut group = c.benchmark_group("snapshot_files"); + + // Pre-create snapshot files for all sizes + let dirs: Vec<_> = SandboxSize::all() + .iter() + .map(|size| { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join(format!("{}.hls", size.name())); + let snapshot = { + let mut sbox = create_multiuse_sandbox_with_size(*size); + sbox.snapshot().unwrap() + }; + snapshot.to_file(&snap_path).unwrap(); + (dir, snapshot) + }) + .collect(); + + // Benchmark: save_snapshot + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_dir = tempfile::tempdir().unwrap(); + let path = snap_dir.path().join("bench.hls"); + let snapshot = &dirs[i].1; + group.bench_function(format!("save_snapshot/{}", size.name()), |b| { + b.iter(|| { + snapshot.to_file(&path).unwrap(); + }); + }); + } + + // Benchmark: load_snapshot (mmap + header parse + hash verify) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function(format!("load_snapshot/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_file(&snap_path).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_evolve (new + evolve + call) + for size in SandboxSize::all() { + group.bench_function(format!("cold_start_via_evolve/{}", size.name()), |b| { + b.iter(|| { + let mut sbox = create_multiuse_sandbox_with_size(size); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot (load + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function(format!("cold_start_via_snapshot/{}", size.name()), |b| { + b.iter(|| { + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot_unchecked (no hash verify) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function( + format!("cold_start_via_snapshot_unchecked/{}", size.name()), + |b| { + b.iter(|| { + let loaded = Snapshot::from_file_unchecked(&snap_path).unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -561,6 +658,7 @@ criterion_group! { guest_call_benchmark_large_param, function_call_serialization_benchmark, sample_workloads_benchmark, - shared_memory_benchmark + shared_memory_benchmark, + snapshot_file_benchmark } criterion_main!(benches); From a8be758e3b62f48921f432afab41f946f4e134ad Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 13:26:42 -0700 Subject: [PATCH 14/15] Add gdb test for MultiUseSandbox from_snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../examples/guest-debugging/main.rs | 193 ++++++++++++++---- 1 file changed, 148 insertions(+), 45 deletions(-) diff --git a/src/hyperlight_host/examples/guest-debugging/main.rs b/src/hyperlight_host/examples/guest-debugging/main.rs index 3555eaeff..331b226a6 100644 --- a/src/hyperlight_host/examples/guest-debugging/main.rs +++ b/src/hyperlight_host/examples/guest-debugging/main.rs @@ -115,6 +115,67 @@ mod tests { #[cfg(windows)] const GDB_COMMAND: &str = "gdb"; + /// Construct the (out_file_path, cmd_file_path, manifest_dir) + /// triple every gdb test needs. + fn gdb_test_paths(name: &str) -> (String, String, String) { + let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") + .expect("Failed to get manifest dir") + .replace('\\', "/"); + let out_file_path = format!("{out_dir}/{name}.output"); + let cmd_file_path = format!("{out_dir}/{name}-commands.txt"); + (out_file_path, cmd_file_path, manifest_dir) + } + + /// Build a gdb script that connects to `port`, sets a single + /// breakpoint at `breakpoint`, prints `echo_msg` when hit, and + /// continues to completion. + fn single_breakpoint_script( + manifest_dir: &str, + port: u16, + out_file_path: &str, + breakpoint: &str, + echo_msg: &str, + ) -> String { + let cmd = format!( + "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest + target remote :{port} + + set pagination off + set logging file {out_file_path} + set logging enabled on + + break {breakpoint} + commands + echo \"{echo_msg}\\n\" + backtrace + + continue + end + + continue + + set logging enabled off + quit + " + ); + #[cfg(windows)] + let cmd = format!("set osabi none\n{cmd}"); + cmd + } + + /// Spawn the gdb client to execute the script in `cmd_file_path`. + fn spawn_gdb_client(cmd_file_path: &str) -> std::process::Child { + Command::new(GDB_COMMAND) + .arg("-nx") + .arg("--nw") + .arg("--batch") + .arg("-x") + .arg(cmd_file_path) + .spawn() + .expect("Failed to start gdb") + } + fn write_cmds_file(cmd_file_path: &str, cmd: &str) -> io::Result<()> { let file = File::create(cmd_file_path)?; let mut writer = BufWriter::new(file); @@ -163,14 +224,7 @@ mod tests { // wait 3 seconds for the gdb to connect thread::sleep(Duration::from_secs(3)); - let mut gdb = Command::new(GDB_COMMAND) - .arg("-nx") // Don't load any .gdbinit files - .arg("--nw") - .arg("--batch") - .arg("-x") - .arg(cmd_file_path) - .spawn() - .map_err(|e| new_error!("Failed to start gdb process: {}", e))?; + let mut gdb = spawn_gdb_client(cmd_file_path); // wait 3 seconds for the gdb to connect thread::sleep(Duration::from_secs(10)); @@ -245,39 +299,16 @@ mod tests { #[test] #[serial] fn test_gdb_end_to_end() { - let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") - .expect("Failed to get manifest dir") - .replace('\\', "/"); - let out_file_path = format!("{out_dir}/gdb.output"); - let cmd_file_path = format!("{out_dir}/gdb-commands.txt"); - - let cmd = format!( - "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest - target remote :8080 - - set pagination off - set logging file {out_file_path} - set logging enabled on - - break hyperlight_main - commands - echo \"Stopped at hyperlight_main breakpoint\\n\" - backtrace - - continue - end - - continue - - set logging enabled off - quit - " + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb"); + + let cmd = single_breakpoint_script( + &manifest_dir, + 8080, + &out_file_path, + "hyperlight_main", + "Stopped at hyperlight_main breakpoint", ); - #[cfg(windows)] - let cmd = format!("set osabi none\n{}", cmd); - let checker = |contents: String| contents.contains("Stopped at hyperlight_main breakpoint"); let result = run_guest_and_gdb(&cmd_file_path, &out_file_path, &cmd, checker); @@ -289,13 +320,8 @@ mod tests { #[test] #[serial] fn test_gdb_sse_check() { - let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") - .expect("Failed to get manifest dir") - .replace('\\', "/"); + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-sse"); println!("manifest dir {manifest_dir}"); - let out_file_path = format!("{out_dir}/gdb-sse.output"); - let cmd_file_path = format!("{out_dir}/gdb-sse--commands.txt"); let cmd = format!( "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest @@ -333,4 +359,81 @@ mod tests { cleanup(&out_file_path, &cmd_file_path); assert!(result.is_ok(), "{}", result.unwrap_err()); } + + #[test] + #[serial] + fn test_gdb_from_snapshot() { + use std::sync::Arc; + + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + const PORT: u16 = 8081; + + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-from-snapshot"); + let out_dir = std::env::var("OUT_DIR").unwrap(); + let snap_path = format!("{out_dir}/from-snapshot-debug.hls"); + + // Build a sandbox the normal way and persist its snapshot. + let mut producer: MultiUseSandbox = UninitializedSandbox::new( + hyperlight_host::GuestBinary::FilePath( + hyperlight_testing::simple_guest_as_string().unwrap(), + ), + None, + ) + .unwrap() + .evolve() + .unwrap(); + producer.snapshot().unwrap().to_file(&snap_path).unwrap(); + + // Order matters. The gdb stub event loop must enter (i.e. + // `VcpuStopped` must be sent on the channel) before the gdb + // client connects, otherwise the wire protocol desyncs. The + // evolve case gets this for free because `evolve()` runs + // `vm.initialise()` which trips the entry breakpoint + // immediately. For a `Call` snapshot `vm.initialise` is a + // no-op, so we trigger the breakpoint by running `sbox.call` + // here before the client is launched below. + let snap_path_thread = snap_path.clone(); + let sandbox_thread = thread::spawn(move || -> Result<()> { + let mut cfg = SandboxConfiguration::default(); + cfg.set_guest_debug_info(DebugInfo { port: PORT }); + + let loaded = Arc::new(Snapshot::from_file(&snap_path_thread)?); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), Some(cfg))?; + sbox.call::( + "PrintOutput", + "Hello from a from_snapshot sandbox\n".to_string(), + )?; + Ok(()) + }); + + // Wait for the sandbox thread to bind the listener, install + // the one-shot breakpoint, and trip it. + thread::sleep(Duration::from_secs(3)); + + let cmd = single_breakpoint_script( + &manifest_dir, + PORT, + &out_file_path, + "main.rs:simpleguest::print_output", + "Stopped at print_output breakpoint", + ); + write_cmds_file(&cmd_file_path, &cmd).expect("Failed to write gdb commands"); + + let mut gdb = spawn_gdb_client(&cmd_file_path); + let _ = gdb.wait(); + let sandbox_result = sandbox_thread + .join() + .expect("from_snapshot sandbox thread panicked"); + let _ = std::fs::remove_file(&snap_path); + + let checker = |contents: String| contents.contains("Stopped at print_output breakpoint"); + let result = check_output(&out_file_path, checker); + + cleanup(&out_file_path, &cmd_file_path); + sandbox_result.expect("from_snapshot sandbox returned error"); + result.expect("gdb output missing expected breakpoint hit"); + } } From 941aa33d778c39c042e14a8330c937e4c9326c20 Mon Sep 17 00:00:00 2001 From: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> Date: Fri, 1 May 2026 22:58:33 -0700 Subject: [PATCH 15/15] Fix flaky gdb tests by detaching from inside the bp commands Replace the racy 'inner continue, outer continue, quit' pattern with 'detach, quit' inside the breakpoint commands. After the previous inner continue, the inferior could exit and the gdb stub could close the remote before gdb dispatched the outer continue, producing 'Remote connection closed' and a non-zero exit. The new shape lets the host run the guest call to completion on its own after detach, with no pending remote work in gdb. Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --- .../examples/guest-debugging/main.rs | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/hyperlight_host/examples/guest-debugging/main.rs b/src/hyperlight_host/examples/guest-debugging/main.rs index 331b226a6..af1ec0548 100644 --- a/src/hyperlight_host/examples/guest-debugging/main.rs +++ b/src/hyperlight_host/examples/guest-debugging/main.rs @@ -129,7 +129,17 @@ mod tests { /// Build a gdb script that connects to `port`, sets a single /// breakpoint at `breakpoint`, prints `echo_msg` when hit, and - /// continues to completion. + /// detaches before quitting. + /// + /// The breakpoint commands end with `detach` + `quit` instead of + /// `continue`. The previous "inner continue, outer continue, quit" + /// shape races with the inferior exit. After the breakpoint hits + /// and the inner `continue` resumes the guest, the guest may run + /// to completion and the gdb stub may close the remote before gdb + /// has dispatched the outer `continue`, producing a non-zero exit + /// with `Remote connection closed`. Detaching from the breakpoint + /// commands removes that window. The host process keeps running + /// the guest call to completion on its own after detach. fn single_breakpoint_script( manifest_dir: &str, port: u16, @@ -150,13 +160,12 @@ mod tests { echo \"{echo_msg}\\n\" backtrace - continue + set logging enabled off + detach + quit end continue - - set logging enabled off - quit " ); #[cfg(windows)] @@ -337,16 +346,14 @@ mod tests { break +2 commands 2 print $xmm1.v4_float - continue + set logging enabled off + detach + quit end continue end - continue - - set logging enabled off - quit " );