diff --git a/Cargo.lock b/Cargo.lock index 4b5b9a13a..cd8a9c387 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,6 +328,26 @@ version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" +[[package]] +name = "bytemuck" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "bytes" version = "1.11.1" @@ -1528,6 +1548,7 @@ dependencies = [ "bitflags 2.11.1", "blake3", "built", + "bytemuck", "cfg-if", "cfg_aliases", "chrono", diff --git a/src/hyperlight_common/src/mem.rs b/src/hyperlight_common/src/mem.rs index fb850acc8..fea8413bd 100644 --- a/src/hyperlight_common/src/mem.rs +++ b/src/hyperlight_common/src/mem.rs @@ -28,6 +28,23 @@ pub struct GuestMemoryRegion { pub ptr: u64, } +impl GuestMemoryRegion { + /// Size of a serialized `GuestMemoryRegion` in bytes. + pub const SERIALIZED_SIZE: usize = core::mem::size_of::(); + + /// Write this region's fields in native-endian byte order to `buf`. + /// Returns `Ok(())` on success, or `Err` if `buf` is too small. + pub fn write_to(&self, buf: &mut [u8]) -> Result<(), &'static str> { + if buf.len() < Self::SERIALIZED_SIZE { + return Err("buffer too small for GuestMemoryRegion"); + } + let s = core::mem::size_of::(); + buf[..s].copy_from_slice(&self.size.to_ne_bytes()); + buf[s..s * 2].copy_from_slice(&self.ptr.to_ne_bytes()); + Ok(()) + } +} + /// Maximum length of a file mapping label (excluding null terminator). pub const FILE_MAPPING_LABEL_MAX_LEN: usize = 63; @@ -80,3 +97,28 @@ pub struct HyperlightPEB { #[cfg(feature = "nanvix-unstable")] pub file_mappings: GuestMemoryRegion, } + +impl HyperlightPEB { + /// Write the PEB fields in native-endian byte order to `buf`. + /// The buffer must be at least `size_of::()` bytes. + /// Returns `Err` if the buffer is too small. + pub fn write_to(&self, buf: &mut [u8]) -> Result<(), &'static str> { + if buf.len() < core::mem::size_of::() { + return Err("buffer too small for HyperlightPEB"); + } + let regions = [ + &self.input_stack, + &self.output_stack, + &self.init_data, + &self.guest_heap, + #[cfg(feature = "nanvix-unstable")] + &self.file_mappings, + ]; + let mut offset = 0; + for region in regions { + region.write_to(&mut buf[offset..])?; + offset += GuestMemoryRegion::SERIALIZED_SIZE; + } + Ok(()) + } +} diff --git a/src/hyperlight_host/Cargo.toml b/src/hyperlight_host/Cargo.toml index abc24fab8..e8343a7ef 100644 --- a/src/hyperlight_host/Cargo.toml +++ b/src/hyperlight_host/Cargo.toml @@ -51,6 +51,7 @@ metrics = "0.24.3" serde_json = "1.0" elfcore = { version = "2.0", optional = true } uuid = { version = "1.23.1", features = ["v4"] } +bytemuck = { version = "1.16", features = ["derive"] } [target.'cfg(windows)'.dependencies] windows = { version = "0.62", features = [ diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 462e8908d..e35d2c175 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -551,6 +551,103 @@ fn shared_memory_benchmark(c: &mut Criterion) { group.finish(); } +// ============================================================================ +// Benchmark Category: Snapshot Files +// ============================================================================ + +fn snapshot_file_benchmark(c: &mut Criterion) { + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + let mut group = c.benchmark_group("snapshot_files"); + + // Pre-create snapshot files for all sizes + let dirs: Vec<_> = SandboxSize::all() + .iter() + .map(|size| { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join(format!("{}.hls", size.name())); + let snapshot = { + let mut sbox = create_multiuse_sandbox_with_size(*size); + sbox.snapshot().unwrap() + }; + snapshot.to_file(&snap_path).unwrap(); + (dir, snapshot) + }) + .collect(); + + // Benchmark: save_snapshot + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_dir = tempfile::tempdir().unwrap(); + let path = snap_dir.path().join("bench.hls"); + let snapshot = &dirs[i].1; + group.bench_function(format!("save_snapshot/{}", size.name()), |b| { + b.iter(|| { + snapshot.to_file(&path).unwrap(); + }); + }); + } + + // Benchmark: load_snapshot (mmap + header parse + hash verify) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function(format!("load_snapshot/{}", size.name()), |b| { + b.iter(|| { + let _ = Snapshot::from_file(&snap_path).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_evolve (new + evolve + call) + for size in SandboxSize::all() { + group.bench_function(format!("cold_start_via_evolve/{}", size.name()), |b| { + b.iter(|| { + let mut sbox = create_multiuse_sandbox_with_size(size); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot (load + from_snapshot + call) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function(format!("cold_start_via_snapshot/{}", size.name()), |b| { + b.iter(|| { + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }); + } + + // Benchmark: cold_start_via_snapshot_unchecked (no hash verify) + for (i, size) in SandboxSize::all().iter().enumerate() { + let snap_path = dirs[i].0.path().join(format!("{}.hls", size.name())); + group.bench_function( + format!("cold_start_via_snapshot_unchecked/{}", size.name()), + |b| { + b.iter(|| { + let loaded = Snapshot::from_file_unchecked(&snap_path).unwrap(); + let mut sbox = MultiUseSandbox::from_snapshot( + std::sync::Arc::new(loaded), + HostFunctions::default(), + None, + ) + .unwrap(); + sbox.call::("Echo", "hello\n".to_string()).unwrap(); + }); + }, + ); + } + + group.finish(); +} + criterion_group! { name = benches; config = Criterion::default(); @@ -561,6 +658,7 @@ criterion_group! { guest_call_benchmark_large_param, function_call_serialization_benchmark, sample_workloads_benchmark, - shared_memory_benchmark + shared_memory_benchmark, + snapshot_file_benchmark } criterion_main!(benches); diff --git a/src/hyperlight_host/examples/guest-debugging/main.rs b/src/hyperlight_host/examples/guest-debugging/main.rs index 3555eaeff..af1ec0548 100644 --- a/src/hyperlight_host/examples/guest-debugging/main.rs +++ b/src/hyperlight_host/examples/guest-debugging/main.rs @@ -115,6 +115,76 @@ mod tests { #[cfg(windows)] const GDB_COMMAND: &str = "gdb"; + /// Construct the (out_file_path, cmd_file_path, manifest_dir) + /// triple every gdb test needs. + fn gdb_test_paths(name: &str) -> (String, String, String) { + let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") + .expect("Failed to get manifest dir") + .replace('\\', "/"); + let out_file_path = format!("{out_dir}/{name}.output"); + let cmd_file_path = format!("{out_dir}/{name}-commands.txt"); + (out_file_path, cmd_file_path, manifest_dir) + } + + /// Build a gdb script that connects to `port`, sets a single + /// breakpoint at `breakpoint`, prints `echo_msg` when hit, and + /// detaches before quitting. + /// + /// The breakpoint commands end with `detach` + `quit` instead of + /// `continue`. The previous "inner continue, outer continue, quit" + /// shape races with the inferior exit. After the breakpoint hits + /// and the inner `continue` resumes the guest, the guest may run + /// to completion and the gdb stub may close the remote before gdb + /// has dispatched the outer `continue`, producing a non-zero exit + /// with `Remote connection closed`. Detaching from the breakpoint + /// commands removes that window. The host process keeps running + /// the guest call to completion on its own after detach. + fn single_breakpoint_script( + manifest_dir: &str, + port: u16, + out_file_path: &str, + breakpoint: &str, + echo_msg: &str, + ) -> String { + let cmd = format!( + "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest + target remote :{port} + + set pagination off + set logging file {out_file_path} + set logging enabled on + + break {breakpoint} + commands + echo \"{echo_msg}\\n\" + backtrace + + set logging enabled off + detach + quit + end + + continue + " + ); + #[cfg(windows)] + let cmd = format!("set osabi none\n{cmd}"); + cmd + } + + /// Spawn the gdb client to execute the script in `cmd_file_path`. + fn spawn_gdb_client(cmd_file_path: &str) -> std::process::Child { + Command::new(GDB_COMMAND) + .arg("-nx") + .arg("--nw") + .arg("--batch") + .arg("-x") + .arg(cmd_file_path) + .spawn() + .expect("Failed to start gdb") + } + fn write_cmds_file(cmd_file_path: &str, cmd: &str) -> io::Result<()> { let file = File::create(cmd_file_path)?; let mut writer = BufWriter::new(file); @@ -163,14 +233,7 @@ mod tests { // wait 3 seconds for the gdb to connect thread::sleep(Duration::from_secs(3)); - let mut gdb = Command::new(GDB_COMMAND) - .arg("-nx") // Don't load any .gdbinit files - .arg("--nw") - .arg("--batch") - .arg("-x") - .arg(cmd_file_path) - .spawn() - .map_err(|e| new_error!("Failed to start gdb process: {}", e))?; + let mut gdb = spawn_gdb_client(cmd_file_path); // wait 3 seconds for the gdb to connect thread::sleep(Duration::from_secs(10)); @@ -245,39 +308,16 @@ mod tests { #[test] #[serial] fn test_gdb_end_to_end() { - let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") - .expect("Failed to get manifest dir") - .replace('\\', "/"); - let out_file_path = format!("{out_dir}/gdb.output"); - let cmd_file_path = format!("{out_dir}/gdb-commands.txt"); - - let cmd = format!( - "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest - target remote :8080 - - set pagination off - set logging file {out_file_path} - set logging enabled on - - break hyperlight_main - commands - echo \"Stopped at hyperlight_main breakpoint\\n\" - backtrace - - continue - end - - continue - - set logging enabled off - quit - " + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb"); + + let cmd = single_breakpoint_script( + &manifest_dir, + 8080, + &out_file_path, + "hyperlight_main", + "Stopped at hyperlight_main breakpoint", ); - #[cfg(windows)] - let cmd = format!("set osabi none\n{}", cmd); - let checker = |contents: String| contents.contains("Stopped at hyperlight_main breakpoint"); let result = run_guest_and_gdb(&cmd_file_path, &out_file_path, &cmd, checker); @@ -289,13 +329,8 @@ mod tests { #[test] #[serial] fn test_gdb_sse_check() { - let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir"); - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR") - .expect("Failed to get manifest dir") - .replace('\\', "/"); + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-sse"); println!("manifest dir {manifest_dir}"); - let out_file_path = format!("{out_dir}/gdb-sse.output"); - let cmd_file_path = format!("{out_dir}/gdb-sse--commands.txt"); let cmd = format!( "file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest @@ -311,16 +346,14 @@ mod tests { break +2 commands 2 print $xmm1.v4_float - continue + set logging enabled off + detach + quit end continue end - continue - - set logging enabled off - quit " ); @@ -333,4 +366,81 @@ mod tests { cleanup(&out_file_path, &cmd_file_path); assert!(result.is_ok(), "{}", result.unwrap_err()); } + + #[test] + #[serial] + fn test_gdb_from_snapshot() { + use std::sync::Arc; + + use hyperlight_host::HostFunctions; + use hyperlight_host::sandbox::snapshot::Snapshot; + + const PORT: u16 = 8081; + + let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-from-snapshot"); + let out_dir = std::env::var("OUT_DIR").unwrap(); + let snap_path = format!("{out_dir}/from-snapshot-debug.hls"); + + // Build a sandbox the normal way and persist its snapshot. + let mut producer: MultiUseSandbox = UninitializedSandbox::new( + hyperlight_host::GuestBinary::FilePath( + hyperlight_testing::simple_guest_as_string().unwrap(), + ), + None, + ) + .unwrap() + .evolve() + .unwrap(); + producer.snapshot().unwrap().to_file(&snap_path).unwrap(); + + // Order matters. The gdb stub event loop must enter (i.e. + // `VcpuStopped` must be sent on the channel) before the gdb + // client connects, otherwise the wire protocol desyncs. The + // evolve case gets this for free because `evolve()` runs + // `vm.initialise()` which trips the entry breakpoint + // immediately. For a `Call` snapshot `vm.initialise` is a + // no-op, so we trigger the breakpoint by running `sbox.call` + // here before the client is launched below. + let snap_path_thread = snap_path.clone(); + let sandbox_thread = thread::spawn(move || -> Result<()> { + let mut cfg = SandboxConfiguration::default(); + cfg.set_guest_debug_info(DebugInfo { port: PORT }); + + let loaded = Arc::new(Snapshot::from_file(&snap_path_thread)?); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), Some(cfg))?; + sbox.call::( + "PrintOutput", + "Hello from a from_snapshot sandbox\n".to_string(), + )?; + Ok(()) + }); + + // Wait for the sandbox thread to bind the listener, install + // the one-shot breakpoint, and trip it. + thread::sleep(Duration::from_secs(3)); + + let cmd = single_breakpoint_script( + &manifest_dir, + PORT, + &out_file_path, + "main.rs:simpleguest::print_output", + "Stopped at print_output breakpoint", + ); + write_cmds_file(&cmd_file_path, &cmd).expect("Failed to write gdb commands"); + + let mut gdb = spawn_gdb_client(&cmd_file_path); + let _ = gdb.wait(); + let sandbox_result = sandbox_thread + .join() + .expect("from_snapshot sandbox thread panicked"); + let _ = std::fs::remove_file(&snap_path); + + let checker = |contents: String| contents.contains("Stopped at print_output breakpoint"); + let result = check_output(&out_file_path, checker); + + cleanup(&out_file_path, &cmd_file_path); + sandbox_result.expect("from_snapshot sandbox returned error"); + result.expect("gdb output missing expected breakpoint hit"); + } } diff --git a/src/hyperlight_host/src/func/host_functions.rs b/src/hyperlight_host/src/func/host_functions.rs index e87fa70b0..9ccb98f05 100644 --- a/src/hyperlight_host/src/func/host_functions.rs +++ b/src/hyperlight_host/src/func/host_functions.rs @@ -52,7 +52,8 @@ impl Registerable for UninitializedSandbox { return_type: Output::TYPE, }; - (*hfs).register_host_function(name.to_string(), entry) + (*hfs).register_host_function(name.to_string(), entry); + Ok(()) } } @@ -92,7 +93,26 @@ impl Registerable for crate::MultiUseSandbox { return_type: Output::TYPE, }; - (*hfs).register_host_function(name.to_string(), entry) + (*hfs).register_host_function(name.to_string(), entry); + Ok(()) + } +} + +impl Registerable for crate::HostFunctions { + fn register_host_function( + &mut self, + name: &str, + hf: impl Into>, + ) -> Result<()> { + let entry = FunctionEntry { + function: hf.into().into(), + parameter_types: Args::TYPE, + return_type: Output::TYPE, + }; + + self.inner_mut() + .register_host_function(name.to_string(), entry); + Ok(()) } } @@ -236,7 +256,7 @@ pub(crate) fn register_host_function std::result::Result { let CommonRegisters { rip, .. } = vm.regs()?; @@ -81,10 +80,6 @@ pub(crate) fn vcpu_stop_reason( // Check page 19-4 Vol. 3B of Intel 64 and IA-32 // Architectures Software Developer's Manual if DR6_HW_BP_FLAGS_MASK & dr6 != 0 { - if rip == entrypoint { - vm.remove_hw_breakpoint(entrypoint)?; - return Ok(VcpuStopReason::EntryPointBp); - } return Ok(VcpuStopReason::HwBp); } } @@ -98,12 +93,10 @@ pub(crate) fn vcpu_stop_reason( r"The vCPU exited because of an unknown reason: rip: {:?} dr6: {:?} - entrypoint: {:?} exception: {:?} ", rip, dr6, - entrypoint, exception, ); diff --git a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs index bc7c9fd14..5edd81b50 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/event_loop.rs @@ -59,7 +59,6 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { // Resume execution if unknown reason for stop let stop_response = match stop_reason { VcpuStopReason::DoneStep => BaseStopReason::DoneStep, - VcpuStopReason::EntryPointBp => BaseStopReason::HwBreak(()), VcpuStopReason::SwBp => BaseStopReason::SwBreak(()), VcpuStopReason::HwBp => BaseStopReason::HwBreak(()), // This is a consequence of the GDB client sending an interrupt signal diff --git a/src/hyperlight_host/src/hypervisor/gdb/mod.rs b/src/hyperlight_host/src/hypervisor/gdb/mod.rs index 94396e5ae..0a0685f71 100644 --- a/src/hyperlight_host/src/hypervisor/gdb/mod.rs +++ b/src/hyperlight_host/src/hypervisor/gdb/mod.rs @@ -171,10 +171,6 @@ impl DebugMemoryAccess { pub enum VcpuStopReason { Crash, DoneStep, - /// Hardware breakpoint inserted by the hypervisor so the guest can be stopped - /// at the entry point. This is used to avoid the guest from executing - /// the entry point code before the debugger is connected - EntryPointBp, HwBp, SwBp, Interrupt, diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs index 830b856c0..221552230 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs @@ -389,6 +389,12 @@ pub(crate) struct HyperlightVm { pub(super) gdb_conn: Option>, #[cfg(gdb)] pub(super) sw_breakpoints: HashMap, // addr -> original instruction + /// One-shot hw breakpoint installed at the entry address when gdb is + /// enabled, so the gdb stub gets a `VcpuStopped` to enter its event + /// loop on the first vCPU run after construction. Cleared on first + /// hit by `handle_debug`. + #[cfg(gdb)] + pub(super) one_shot_entry_bp: Option, #[cfg(feature = "mem_profile")] pub(super) trace_info: MemTraceInfo, #[cfg(crashdump)] @@ -598,17 +604,28 @@ impl HyperlightVm { match exit_reason { #[cfg(gdb)] Ok(VmExit::Debug { dr6, exception }) => { - let initialise = match self.entrypoint { - NextAction::Initialise(initialise) => initialise, - _ => 0, - }; - // Handle debug event (breakpoints) + // Classify the debug exit. `vcpu_stop_reason` is a + // pure classifier and has no side effects on the VM. let stop_reason = crate::hypervisor::gdb::arch::vcpu_stop_reason( self.vm.as_mut(), dr6, - initialise, exception, )?; + // Remove the one-shot entry breakpoint installed by + // `HyperlightVm::new` the first time it fires so it + // does not interfere with later user-installed + // breakpoints at the same address. + if matches!(stop_reason, VcpuStopReason::HwBp) + && let Some(entry_addr) = self.one_shot_entry_bp + { + let rip = self.vm.regs().map_err(VcpuStopReasonError::GetRegs)?.rip; + if rip == entry_addr { + self.vm + .remove_hw_breakpoint(entry_addr) + .map_err(VcpuStopReasonError::RemoveHwBreakpoint)?; + self.one_shot_entry_bp = None; + } + } if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) { break Err(e.into()); } diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs index f06c94964..6feff13f1 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm/x86_64.rs @@ -168,6 +168,8 @@ impl HyperlightVm { gdb_conn, #[cfg(gdb)] sw_breakpoints: HashMap::new(), + #[cfg(gdb)] + one_shot_entry_bp: None, #[cfg(feature = "mem_profile")] trace_info, #[cfg(crashdump)] @@ -182,12 +184,21 @@ impl HyperlightVm { #[cfg(gdb)] if ret.gdb_conn.is_some() { ret.send_dbg_msg(DebugResponse::InterruptHandle(ret.interrupt_handle.clone()))?; - // Add breakpoint to the entry point address, if we are going to initialise + // Add breakpoint at the entry point address. The breakpoint + // is removed on first hit by the run loop. Tracked via + // `one_shot_entry_bp` so it does not interfere with later + // user-installed breakpoints at the same address. ret.vm.set_debug(true).map_err(VmError::Debug)?; - if let NextAction::Initialise(initialise) = entrypoint { + let entry_addr = match entrypoint { + NextAction::Initialise(addr) | NextAction::Call(addr) => Some(addr), + #[cfg(test)] + NextAction::None => None, + }; + if let Some(addr) = entry_addr { ret.vm - .add_hw_breakpoint(initialise) + .add_hw_breakpoint(addr) .map_err(CreateHyperlightVmError::AddHwBreakpoint)?; + ret.one_shot_entry_bp = Some(addr); } } @@ -346,6 +357,15 @@ impl HyperlightVm { self.vm.set_debug_regs(&CommonDebugRegs::default())?; self.vm.reset_xsave()?; + self.apply_sregs(cr3, sregs) + } + + /// Apply special registers and mark TLB for flush. + pub(crate) fn apply_sregs( + &mut self, + cr3: u64, + sregs: &CommonSpecialRegisters, + ) -> std::result::Result<(), RegisterError> { // Restore the full special registers from snapshot, but update CR3 // to point to the new (relocated) page tables let mut sregs = *sregs; @@ -1489,7 +1509,7 @@ mod tests { let (mut hshm, gshm) = mem_mgr.build().unwrap(); - let peb_address = gshm.layout.peb_address; + let peb_address = gshm.layout.peb_address(); let stack_top_gva = hyperlight_common::layout::MAX_GVA as u64 - hyperlight_common::layout::SCRATCH_TOP_EXN_STACK_OFFSET + 1; diff --git a/src/hyperlight_host/src/lib.rs b/src/hyperlight_host/src/lib.rs index b21f413e3..c1d51cd5b 100644 --- a/src/hyperlight_host/src/lib.rs +++ b/src/hyperlight_host/src/lib.rs @@ -91,6 +91,9 @@ pub use hypervisor::virtual_machine::is_hypervisor_present; pub use sandbox::MultiUseSandbox; /// The re-export for the `UninitializedSandbox` type pub use sandbox::UninitializedSandbox; +/// A collection of host functions that can be supplied to a sandbox +/// constructor (e.g. [`MultiUseSandbox::from_snapshot`]). +pub use sandbox::host_funcs::HostFunctions; /// The re-export for the `GuestBinary` type pub use sandbox::uninitialized::GuestBinary; /// The re-export for the `GuestCounter` type diff --git a/src/hyperlight_host/src/mem/layout.rs b/src/hyperlight_host/src/mem/layout.rs index 26615d579..a5bca8727 100644 --- a/src/hyperlight_host/src/mem/layout.rs +++ b/src/hyperlight_host/src/mem/layout.rs @@ -60,8 +60,9 @@ limitations under the License. //! | Input Data | //! +-------------------------------------------+ (scratch size) -use std::fmt::Debug; -use std::mem::{offset_of, size_of}; +#[cfg(feature = "nanvix-unstable")] +use std::mem::offset_of; +use std::mem::size_of; use hyperlight_common::mem::{HyperlightPEB, PAGE_SIZE_USIZE}; use tracing::{Span, instrument}; @@ -213,100 +214,27 @@ impl ResolvedGpa { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub(crate) struct SandboxMemoryLayout { - pub(super) sandbox_memory_config: SandboxConfiguration, + /// Input data buffer size (from SandboxConfiguration). + pub(crate) input_data_size: usize, + /// Output data buffer size (from SandboxConfiguration). + pub(crate) output_data_size: usize, /// The heap size of this sandbox. - pub(super) heap_size: usize, - init_data_size: usize, - - /// The following fields are offsets to the actual PEB struct fields. - /// They are used when writing the PEB struct itself - peb_offset: usize, - peb_input_data_offset: usize, - peb_output_data_offset: usize, - peb_init_data_offset: usize, - peb_heap_data_offset: usize, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset: usize, - - guest_heap_buffer_offset: usize, - init_data_offset: usize, - pt_size: Option, - - // other - pub(crate) peb_address: usize, - code_size: usize, - // The offset in the sandbox memory where the code starts - guest_code_offset: usize, + pub(crate) heap_size: usize, + /// The size of the guest code section. + pub(crate) code_size: usize, + /// The size of the init data section (guest blob). + pub(crate) init_data_size: usize, + /// Permission flags for the init data region. #[cfg_attr(feature = "i686-guest", allow(unused))] pub(crate) init_data_permissions: Option, - - // The size of the scratch region in physical memory; note that - // this will appear under the top of physical memory. - scratch_size: usize, - // The guest-visible size of the snapshot region in physical - // memory. After compaction this may be smaller than the full - // snapshot blob (which also contains a PT tail that is only - // host-accessible). - snapshot_size: usize, -} - -impl Debug for SandboxMemoryLayout { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let mut ff = f.debug_struct("SandboxMemoryLayout"); - ff.field( - "Total Memory Size", - &format_args!("{:#x}", self.get_memory_size().unwrap_or(0)), - ) - .field("Heap Size", &format_args!("{:#x}", self.heap_size)) - .field( - "Init Data Size", - &format_args!("{:#x}", self.init_data_size), - ) - .field("PEB Address", &format_args!("{:#x}", self.peb_address)) - .field("PEB Offset", &format_args!("{:#x}", self.peb_offset)) - .field("Code Size", &format_args!("{:#x}", self.code_size)) - .field( - "Input Data Offset", - &format_args!("{:#x}", self.peb_input_data_offset), - ) - .field( - "Output Data Offset", - &format_args!("{:#x}", self.peb_output_data_offset), - ) - .field( - "Init Data Offset", - &format_args!("{:#x}", self.peb_init_data_offset), - ) - .field( - "Guest Heap Offset", - &format_args!("{:#x}", self.peb_heap_data_offset), - ); - #[cfg(feature = "nanvix-unstable")] - ff.field( - "File Mappings Offset", - &format_args!("{:#x}", self.peb_file_mappings_offset), - ); - ff.field( - "Guest Heap Buffer Offset", - &format_args!("{:#x}", self.guest_heap_buffer_offset), - ) - .field( - "Init Data Offset", - &format_args!("{:#x}", self.init_data_offset), - ) - .field("PT Size", &format_args!("{:#x}", self.pt_size.unwrap_or(0))) - .field( - "Guest Code Offset", - &format_args!("{:#x}", self.guest_code_offset), - ) - .field( - "Scratch region size", - &format_args!("{:#x}", self.scratch_size), - ) - .finish() - } + /// The size of the scratch region in physical memory. + pub(crate) scratch_size: usize, + /// The size of the snapshot region in physical memory. + pub(crate) snapshot_size: usize, + /// The size of the page tables (None if not yet set). + pub(crate) pt_size: Option, } impl SandboxMemoryLayout { @@ -337,65 +265,19 @@ impl SandboxMemoryLayout { if scratch_size > Self::MAX_MEMORY_SIZE { return Err(MemoryRequestTooBig(scratch_size, Self::MAX_MEMORY_SIZE)); } - let min_scratch_size = hyperlight_common::layout::min_scratch_size( - cfg.get_input_data_size(), - cfg.get_output_data_size(), - ); + let input_data_size = cfg.get_input_data_size(); + let output_data_size = cfg.get_output_data_size(); + let min_scratch_size = + hyperlight_common::layout::min_scratch_size(input_data_size, output_data_size); if scratch_size < min_scratch_size { return Err(MemoryRequestTooSmall(scratch_size, min_scratch_size)); } - let guest_code_offset = 0; - // The following offsets are to the fields of the PEB struct itself! - let peb_offset = code_size.next_multiple_of(PAGE_SIZE_USIZE); - let peb_input_data_offset = peb_offset + offset_of!(HyperlightPEB, input_stack); - let peb_output_data_offset = peb_offset + offset_of!(HyperlightPEB, output_stack); - let peb_init_data_offset = peb_offset + offset_of!(HyperlightPEB, init_data); - let peb_heap_data_offset = peb_offset + offset_of!(HyperlightPEB, guest_heap); - #[cfg(feature = "nanvix-unstable")] - let peb_file_mappings_offset = peb_offset + offset_of!(HyperlightPEB, file_mappings); - - // The following offsets are the actual values that relate to memory layout, - // which are written to PEB struct - let peb_address = Self::BASE_ADDRESS + peb_offset; - // make sure heap buffer starts at 4K boundary. - // The FileMappingInfo array is stored immediately after the PEB struct. - // We statically reserve space for MAX_FILE_MAPPINGS entries so that - // the heap never overlaps the array, even when all slots are used. - // The host writes file mapping metadata here via write_file_mapping_entry; - // the guest only reads the entries. We don't know at layout time how - // many file mappings the host will register, so we reserve space for - // the maximum number. - // The heap starts at the next page boundary after this reserved area. - #[cfg(feature = "nanvix-unstable")] - let file_mappings_array_end = peb_offset - + size_of::() - + hyperlight_common::mem::MAX_FILE_MAPPINGS - * size_of::(); - #[cfg(feature = "nanvix-unstable")] - let guest_heap_buffer_offset = file_mappings_array_end.next_multiple_of(PAGE_SIZE_USIZE); - #[cfg(not(feature = "nanvix-unstable"))] - let guest_heap_buffer_offset = - (peb_offset + size_of::()).next_multiple_of(PAGE_SIZE_USIZE); - - // make sure init data starts at 4K boundary - let init_data_offset = - (guest_heap_buffer_offset + heap_size).next_multiple_of(PAGE_SIZE_USIZE); let mut ret = Self { - peb_offset, + input_data_size, + output_data_size, heap_size, - peb_input_data_offset, - peb_output_data_offset, - peb_init_data_offset, - peb_heap_data_offset, - #[cfg(feature = "nanvix-unstable")] - peb_file_mappings_offset, - sandbox_memory_config: cfg, code_size, - guest_heap_buffer_offset, - peb_address, - guest_code_offset, - init_data_offset, init_data_size, init_data_permissions, pt_size: None, @@ -406,68 +288,64 @@ impl SandboxMemoryLayout { Ok(ret) } - /// Get the offset in guest memory to the output data size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_output_data_size_offset(&self) -> usize { - // The size field is the first field in the `OutputData` struct - self.peb_output_data_offset + /// Offset of the PEB struct within the snapshot region. + pub(crate) fn peb_offset(&self) -> usize { + self.code_size.next_multiple_of(PAGE_SIZE_USIZE) } - /// Get the offset in guest memory to the init data size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_init_data_size_offset(&self) -> usize { - // The init data size is the first field in the `GuestMemoryRegion` struct - self.peb_init_data_offset + /// Offset of the PEB file_mappings field. + #[cfg(feature = "nanvix-unstable")] + fn peb_file_mappings_offset(&self) -> usize { + self.peb_offset() + offset_of!(HyperlightPEB, file_mappings) } - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_scratch_size(&self) -> usize { - self.scratch_size + /// Guest physical address of the PEB. + pub(crate) fn peb_address(&self) -> usize { + Self::BASE_ADDRESS + self.peb_offset() } - /// Get the offset in guest memory to the output data pointer. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_output_data_pointer_offset(&self) -> usize { - // This field is immediately after the output data size field, - // which is a `u64`. - self.get_output_data_size_offset() + size_of::() + /// Offset of the guest heap buffer within the snapshot region. + pub(crate) fn guest_heap_buffer_offset(&self) -> usize { + #[cfg(feature = "nanvix-unstable")] + { + let file_mappings_array_end = self.peb_offset() + + size_of::() + + hyperlight_common::mem::MAX_FILE_MAPPINGS + * size_of::(); + file_mappings_array_end.next_multiple_of(PAGE_SIZE_USIZE) + } + #[cfg(not(feature = "nanvix-unstable"))] + { + (self.peb_offset() + size_of::()).next_multiple_of(PAGE_SIZE_USIZE) + } + } + + /// Offset of the init data section within the snapshot region. + pub(crate) fn init_data_offset(&self) -> usize { + (self.guest_heap_buffer_offset() + self.heap_size).next_multiple_of(PAGE_SIZE_USIZE) + } + + /// The code offset is always 0. + pub(crate) fn guest_code_offset(&self) -> usize { + 0 } - /// Get the offset in guest memory to the init data pointer. #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_init_data_pointer_offset(&self) -> usize { - // The init data pointer is immediately after the init data size field, - // which is a `u64`. - self.get_init_data_size_offset() + size_of::() + pub(crate) fn get_scratch_size(&self) -> usize { + self.scratch_size } /// Get the guest virtual address of the start of output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_output_data_buffer_gva(&self) -> u64 { - hyperlight_common::layout::scratch_base_gva(self.scratch_size) - + self.sandbox_memory_config.get_input_data_size() as u64 + hyperlight_common::layout::scratch_base_gva(self.scratch_size) + self.input_data_size as u64 } /// Get the offset into the host scratch buffer of the start of /// the output data. #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_output_data_buffer_scratch_host_offset(&self) -> usize { - self.sandbox_memory_config.get_input_data_size() - } - - /// Get the offset in guest memory to the input data size. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(super) fn get_input_data_size_offset(&self) -> usize { - // The input data size is the first field in the input stack's `GuestMemoryRegion` struct - self.peb_input_data_offset - } - - /// Get the offset in guest memory to the input data pointer. - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_input_data_pointer_offset(&self) -> usize { - // The input data pointer is immediately after the input - // data size field in the input data `GuestMemoryRegion` struct which is a `u64`. - self.get_input_data_size_offset() + size_of::() + self.input_data_size } /// Get the guest virtual address of the start of input data @@ -487,9 +365,8 @@ impl SandboxMemoryLayout { /// location where page tables will be eagerly copied on restore #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_pt_base_scratch_offset(&self) -> usize { - (self.sandbox_memory_config.get_input_data_size() - + self.sandbox_memory_config.get_output_data_size()) - .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) + (self.input_data_size + self.output_data_size) + .next_multiple_of(hyperlight_common::vmem::PAGE_SIZE) } /// Get the base GPA to which the page tables will be eagerly @@ -507,30 +384,18 @@ impl SandboxMemoryLayout { self.get_pt_base_gpa() + self.pt_size.unwrap_or(0) as u64 } - /// Get the offset in guest memory to the heap size - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_heap_size_offset(&self) -> usize { - self.peb_heap_data_offset - } - /// Get the offset in guest memory to the file_mappings count field /// (the `size` field of the `GuestMemoryRegion` in the PEB). #[cfg(feature = "nanvix-unstable")] pub(crate) fn get_file_mappings_size_offset(&self) -> usize { - self.peb_file_mappings_offset - } - - /// Get the offset in guest memory to the file_mappings pointer field. - #[cfg(feature = "nanvix-unstable")] - fn get_file_mappings_pointer_offset(&self) -> usize { - self.get_file_mappings_size_offset() + size_of::() + self.peb_file_mappings_offset() } /// Get the offset in snapshot memory where the FileMappingInfo array starts /// (immediately after the PEB struct, within the same page). #[cfg(feature = "nanvix-unstable")] pub(crate) fn get_file_mappings_array_offset(&self) -> usize { - self.peb_offset + size_of::() + self.peb_offset() + size_of::() } /// Get the guest address of the FileMappingInfo array. @@ -539,32 +404,24 @@ impl SandboxMemoryLayout { (Self::BASE_ADDRESS + self.get_file_mappings_array_offset()) as u64 } - /// Get the offset of the heap pointer in guest memory, - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - fn get_heap_pointer_offset(&self) -> usize { - // The heap pointer is immediately after the - // heap size field in the guest heap's `GuestMemoryRegion` struct which is a `u64`. - self.get_heap_size_offset() + size_of::() - } - /// Get the total size of guest memory in `self`'s memory /// layout. #[instrument(skip_all, parent = Span::current(), level= "Trace")] fn get_unaligned_memory_size(&self) -> usize { - self.init_data_offset + self.init_data_size + self.init_data_offset() + self.init_data_size } /// get the code offset /// This is the offset in the sandbox memory where the code starts #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_code_offset(&self) -> usize { - self.guest_code_offset + self.guest_code_offset() } /// Get the guest address of the code section in the sandbox #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_code_address(&self) -> usize { - Self::BASE_ADDRESS + self.guest_code_offset + Self::BASE_ADDRESS + self.guest_code_offset() } /// Get the total size of guest memory in `self`'s memory @@ -592,8 +449,8 @@ impl SandboxMemoryLayout { #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn set_pt_size(&mut self, size: usize) -> Result<()> { let min_fixed_scratch = hyperlight_common::layout::min_scratch_size( - self.sandbox_memory_config.get_input_data_size(), - self.sandbox_memory_config.get_output_data_size(), + self.input_data_size, + self.output_data_size, ); let min_scratch = min_fixed_scratch + size; if self.scratch_size < min_scratch { @@ -632,7 +489,7 @@ impl SandboxMemoryLayout { Code, ); - let expected_peb_offset = TryInto::::try_into(self.peb_offset)?; + let expected_peb_offset = TryInto::::try_into(self.peb_offset())?; if peb_offset != expected_peb_offset { return Err(new_error!( @@ -658,7 +515,7 @@ impl SandboxMemoryLayout { let heap_offset = builder.push_page_aligned(size_of::(), MemoryRegionFlags::READ, Peb); - let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset)?; + let expected_heap_offset = TryInto::::try_into(self.guest_heap_buffer_offset())?; if heap_offset != expected_heap_offset { return Err(new_error!( @@ -682,7 +539,7 @@ impl SandboxMemoryLayout { Heap, ); - let expected_init_data_offset = TryInto::::try_into(self.init_data_offset)?; + let expected_init_data_offset = TryInto::::try_into(self.init_data_offset())?; if init_data_offset != expected_init_data_offset { return Err(new_error!( @@ -719,7 +576,7 @@ impl SandboxMemoryLayout { #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn write_init_data(&self, out: &mut [u8], bytes: &[u8]) -> Result<()> { - out[self.init_data_offset..self.init_data_offset + self.init_data_size] + out[self.init_data_offset()..self.init_data_offset() + self.init_data_size] .copy_from_slice(bytes); Ok(()) } @@ -731,84 +588,43 @@ impl SandboxMemoryLayout { /// from this function. #[instrument(err(Debug), skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn write_peb(&self, mem: &mut [u8]) -> Result<()> { - let guest_offset = SandboxMemoryLayout::BASE_ADDRESS; - - fn write_u64(mem: &mut [u8], offset: usize, value: u64) -> Result<()> { - if offset + 8 > mem.len() { - return Err(new_error!( - "Cannot write to offset {} in slice of len {}", - offset, - mem.len() - )); - } - mem[offset..offset + 8].copy_from_slice(&u64::to_ne_bytes(value)); - Ok(()) - } + use hyperlight_common::mem::GuestMemoryRegion; - macro_rules! get_address { - ($something:ident) => { - u64::try_from(guest_offset + self.$something)? - }; - } + let guest_base = Self::BASE_ADDRESS as u64; - // Start of setting up the PEB. The following are in the order of the PEB fields - - // Set up input buffer pointer - write_u64( - mem, - self.get_input_data_size_offset(), - self.sandbox_memory_config - .get_input_data_size() - .try_into()?, - )?; - write_u64( - mem, - self.get_input_data_pointer_offset(), - self.get_input_data_buffer_gva(), - )?; - - // Set up output buffer pointer - write_u64( - mem, - self.get_output_data_size_offset(), - self.sandbox_memory_config - .get_output_data_size() - .try_into()?, - )?; - write_u64( - mem, - self.get_output_data_pointer_offset(), - self.get_output_data_buffer_gva(), - )?; - - // Set up init data pointer - write_u64( - mem, - self.get_init_data_size_offset(), - (self.get_unaligned_memory_size() - self.init_data_offset).try_into()?, - )?; - let addr = get_address!(init_data_offset); - write_u64(mem, self.get_init_data_pointer_offset(), addr)?; - - // Set up heap buffer pointer - let addr = get_address!(guest_heap_buffer_offset); - write_u64(mem, self.get_heap_size_offset(), self.heap_size.try_into()?)?; - write_u64(mem, self.get_heap_pointer_offset(), addr)?; - - // Set up the file_mappings descriptor in the PEB. - // - The `size` field holds the number of valid FileMappingInfo - // entries currently written (initially 0 — entries are added - // later by map_file_cow / evolve). - // - The `ptr` field holds the guest address of the preallocated - // FileMappingInfo array - #[cfg(feature = "nanvix-unstable")] - write_u64(mem, self.get_file_mappings_size_offset(), 0)?; - #[cfg(feature = "nanvix-unstable")] - write_u64( - mem, - self.get_file_mappings_pointer_offset(), - self.get_file_mappings_array_gva(), - )?; + let peb = HyperlightPEB { + input_stack: GuestMemoryRegion { + size: self.input_data_size as u64, + ptr: self.get_input_data_buffer_gva(), + }, + output_stack: GuestMemoryRegion { + size: self.output_data_size as u64, + ptr: self.get_output_data_buffer_gva(), + }, + init_data: GuestMemoryRegion { + size: (self.get_unaligned_memory_size() - self.init_data_offset()) as u64, + ptr: guest_base + self.init_data_offset() as u64, + }, + guest_heap: GuestMemoryRegion { + size: self.heap_size as u64, + ptr: guest_base + self.guest_heap_buffer_offset() as u64, + }, + // Set up the file_mappings descriptor in the PEB. + // - The `size` field holds the number of valid FileMappingInfo + // entries currently written (initially 0 — entries are added + // later by map_file_cow / evolve). + // - The `ptr` field holds the guest address of the preallocated + // FileMappingInfo array + #[cfg(feature = "nanvix-unstable")] + file_mappings: GuestMemoryRegion { + size: 0, // entry count, populated later by map_file_cow + ptr: self.get_file_mappings_array_gva(), + }, + }; + + let offset = self.peb_offset(); + peb.write_to(&mut mem[offset..offset + size_of::()]) + .map_err(|e| new_error!("failed to write PEB: {}", e))?; // End of setting up the PEB diff --git a/src/hyperlight_host/src/mem/memory_region.rs b/src/hyperlight_host/src/mem/memory_region.rs index 615fe9cac..de3c83a20 100644 --- a/src/hyperlight_host/src/mem/memory_region.rs +++ b/src/hyperlight_host/src/mem/memory_region.rs @@ -158,7 +158,9 @@ impl MemoryRegionType { /// shared memory mapping with guard pages. pub fn surrogate_mapping(&self) -> SurrogateMapping { match self { - MemoryRegionType::MappedFile => SurrogateMapping::ReadOnlyFile, + MemoryRegionType::MappedFile | MemoryRegionType::Snapshot => { + SurrogateMapping::ReadOnlyFile + } _ => SurrogateMapping::SandboxMemory, } } diff --git a/src/hyperlight_host/src/mem/mgr.rs b/src/hyperlight_host/src/mem/mgr.rs index 68f35ff7d..9dede7083 100644 --- a/src/hyperlight_host/src/mem/mgr.rs +++ b/src/hyperlight_host/src/mem/mgr.rs @@ -22,6 +22,7 @@ use hyperlight_common::flatbuffer_wrappers::function_call::{ }; use hyperlight_common::flatbuffer_wrappers::function_types::FunctionCallResult; use hyperlight_common::flatbuffer_wrappers::guest_log_data::GuestLogData; +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::vmem::{self, PAGE_TABLE_SIZE}; #[cfg(all(feature = "crashdump", not(feature = "i686-guest")))] use hyperlight_common::vmem::{BasicMapping, MappingKind}; @@ -298,6 +299,7 @@ where } /// Create a snapshot with the given mapped regions + #[allow(clippy::too_many_arguments)] pub(crate) fn snapshot( &mut self, sandbox_id: u64, @@ -306,6 +308,7 @@ where rsp_gva: u64, sregs: CommonSpecialRegisters, entrypoint: NextAction, + host_functions: HostFunctionDetails, ) -> Result { self.snapshot_count += 1; Snapshot::new( @@ -320,6 +323,7 @@ where sregs, entrypoint, self.snapshot_count, + host_functions, ) } } @@ -441,7 +445,7 @@ impl SandboxMemoryManager { pub(crate) fn get_host_function_call(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -456,7 +460,7 @@ impl SandboxMemoryManager { self.scratch_mem.push_buffer( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, data, ) } @@ -473,7 +477,7 @@ impl SandboxMemoryManager { self.scratch_mem.push_buffer( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, buffer, )?; Ok(()) @@ -485,7 +489,7 @@ impl SandboxMemoryManager { pub(crate) fn get_guest_function_call_result(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -494,7 +498,7 @@ impl SandboxMemoryManager { pub(crate) fn read_guest_log_data(&mut self) -> Result { self.scratch_mem.try_pop_buffer_into::( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) } @@ -503,7 +507,7 @@ impl SandboxMemoryManager { loop { let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( self.layout.get_output_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_output_data_size(), + self.layout.output_data_size, ) else { break; }; @@ -512,7 +516,7 @@ impl SandboxMemoryManager { loop { let Ok(_) = self.scratch_mem.try_pop_buffer_into::>( self.layout.get_input_data_buffer_scratch_host_offset(), - self.layout.sandbox_memory_config.get_input_data_size(), + self.layout.input_data_size, ) else { break; }; diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 5f975f605..7fb7c9eb4 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -2012,6 +2012,14 @@ pub struct ReadonlySharedMemory { /// by `mapping_at`. If `None`, the full `mem_size()` is mapped. #[cfg_attr(unshared_snapshot_mem, allow(dead_code))] guest_mapped_size: Option, + /// Size of the leading guard region (the bytes between + /// `region.ptr` and the start of the usable memory). For most + /// constructors this is exactly `PAGE_SIZE_USIZE`. The Windows + /// `from_file` path can use a larger leading guard when the + /// snapshot file's `memory_offset` exceeds one page (which + /// happens whenever the file carries host-function metadata + /// before the memory blob). + leading_guard_size: usize, } // Safety: HostMapping is only non-Send/Sync (causing // ReadonlySharedMemory to not be automatically Send/Sync) because raw @@ -2033,6 +2041,7 @@ impl ReadonlySharedMemory { Ok(ReadonlySharedMemory { region: anon.region, guest_mapped_size: None, + leading_guard_size: PAGE_SIZE_USIZE, }) } @@ -2045,6 +2054,7 @@ impl ReadonlySharedMemory { Ok(ReadonlySharedMemory { region: anon.region, guest_mapped_size: Some(guest_mapped_size), + leading_guard_size: PAGE_SIZE_USIZE, }) } @@ -2055,6 +2065,244 @@ impl ReadonlySharedMemory { self.guest_mapped_size.unwrap_or_else(|| self.mem_size()) } + /// Create a `ReadonlySharedMemory` backed by a file on disk. + /// + /// Only the `len` bytes at `[offset..offset+len)` (the memory + /// blob) are exposed via `base_ptr()` and `mem_size()`. + /// + /// `[offset..offset+len)` is surrounded by guard regions on the + /// host. + /// + /// `offset` and `len` must both be non-zero multiples of + /// `PAGE_SIZE`. If `guest_mapped_size` is set, it must also be + /// a non-zero multiple of `PAGE_SIZE` no greater than `len`. + pub(crate) fn from_file( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + if len == 0 { + return Err(new_error!( + "Cannot create file-backed shared memory with size 0" + )); + } + + if offset == 0 || offset % PAGE_SIZE_USIZE != 0 { + return Err(new_error!( + "snapshot file offset {} must be a non-zero multiple of PAGE_SIZE", + offset + )); + } + + if !len.is_multiple_of(PAGE_SIZE_USIZE) { + return Err(new_error!( + "snapshot mapping length {} must be a multiple of PAGE_SIZE", + len + )); + } + + if let Some(gms) = guest_mapped_size + && (gms == 0 || gms > len || !gms.is_multiple_of(PAGE_SIZE_USIZE)) + { + return Err(new_error!( + "snapshot guest_mapped_size {} must be a non-zero multiple of PAGE_SIZE no greater than len {}", + gms, + len + )); + } + + #[cfg(target_os = "linux")] + { + Self::from_file_linux(file, offset, len, guest_mapped_size) + } + #[cfg(target_os = "windows")] + { + Self::from_file_windows(file, offset, len, guest_mapped_size) + } + } + + #[cfg(target_os = "linux")] + fn from_file_linux( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + use std::ffi::c_void; + use std::os::unix::io::AsRawFd; + + use libc::{ + MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_NORESERVE, MAP_PRIVATE, PROT_NONE, PROT_READ, + PROT_WRITE, mmap, off_t, size_t, + }; + + let total_size = len.checked_add(2 * PAGE_SIZE_USIZE).ok_or_else(|| { + new_error!("Memory required for file-backed snapshot exceeded usize::MAX") + })?; + + let fd = file.as_raw_fd(); + let offset: off_t = offset + .try_into() + .map_err(|_| new_error!("snapshot file offset {} exceeds off_t range", offset))?; + + // Allocate the full region (guard + usable + guard) as anonymous + let base = unsafe { + mmap( + null_mut(), + total_size as size_t, + PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, + -1, + 0 as off_t, + ) + }; + if base == MAP_FAILED { + return Err(HyperlightError::MmapFailed( + std::io::Error::last_os_error().raw_os_error(), + )); + } + + // Map the file content over the usable portion (between guard pages). + // PROT_READ | PROT_WRITE: KVM/MSHV require writable host mappings + // to handle copy-on-write page faults from the guest. + // MAP_PRIVATE: writes go to private copies, not the file. + let usable_ptr = unsafe { (base as *mut u8).add(PAGE_SIZE_USIZE) }; + let mapped = unsafe { + mmap( + usable_ptr as *mut c_void, + len as size_t, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, + fd, + offset, + ) + }; + if mapped == MAP_FAILED { + unsafe { libc::munmap(base, total_size as size_t) }; + return Err(HyperlightError::MmapFailed( + std::io::Error::last_os_error().raw_os_error(), + )); + } + + // Guard pages at base and base+total_size-PAGE_SIZE are already + // PROT_NONE from the anonymous mapping; MAP_FIXED only replaced + // the middle portion. + + #[allow(clippy::arc_with_non_send_sync)] + Ok(ReadonlySharedMemory { + region: Arc::new(HostMapping { + ptr: base as *mut u8, + size: total_size, + }), + guest_mapped_size, + leading_guard_size: PAGE_SIZE_USIZE, + }) + } + + /// Windows file mappings must start at file offset 0 and cannot + /// extend beyond the file's size, so the view covers + /// `[0 .. offset + len + PAGE_SIZE)`. The leading `offset` bytes + /// (header plus any host function metadata) become the leading + /// guard, recorded in `leading_guard_size`. The trailing + /// `PAGE_SIZE` bytes (written explicitly by `to_file`) become + /// the trailing guard. Both ends are protected with + /// `VirtualProtect(PAGE_NOACCESS)`. + #[cfg(target_os = "windows")] + fn from_file_windows( + file: &std::fs::File, + offset: usize, + len: usize, + guest_mapped_size: Option, + ) -> Result { + use std::os::windows::io::AsRawHandle; + + use windows::Win32::Foundation::HANDLE; + use windows::Win32::System::Memory::{ + CreateFileMappingA, FILE_MAP_READ, MapViewOfFile, PAGE_NOACCESS, PAGE_PROTECTION_FLAGS, + PAGE_READONLY, VirtualProtect, + }; + use windows::core::PCSTR; + + let leading_guard_size = offset; + let total_size = leading_guard_size + .checked_add(len) + .and_then(|n| n.checked_add(PAGE_SIZE_USIZE)) + .ok_or_else(|| { + new_error!("Memory required for file-backed snapshot exceeded usize::MAX") + })?; + debug_assert!(leading_guard_size >= PAGE_SIZE_USIZE); + debug_assert!(leading_guard_size % PAGE_SIZE_USIZE == 0); + + let file_handle = HANDLE(file.as_raw_handle()); + + // Create a read-only file mapping at the exact file size (pass 0,0). + // The file includes trailing PAGE_SIZE padding written by to_file(), + // so the file is at least leading_guard_size + len + PAGE_SIZE bytes. + let handle = + unsafe { CreateFileMappingA(file_handle, None, PAGE_READONLY, 0, 0, PCSTR::null())? }; + + if handle.is_invalid() { + log_then_return!(HyperlightError::MemoryAllocationFailed( + Error::last_os_error().raw_os_error() + )); + } + + // Map exactly total_size (leading region + blob + trailing padding) bytes. + let addr = unsafe { MapViewOfFile(handle, FILE_MAP_READ, 0, 0, total_size) }; + if addr.Value.is_null() { + unsafe { + let _ = windows::Win32::Foundation::CloseHandle(handle); + } + log_then_return!(HyperlightError::MemoryAllocationFailed( + Error::last_os_error().raw_os_error() + )); + } + + #[allow(clippy::arc_with_non_send_sync)] + let region = Arc::new(HostMapping { + ptr: addr.Value as *mut u8, + size: total_size, + handle, + }); + + // Set guard pages on both ends. + let mut unused_old_prot = PAGE_PROTECTION_FLAGS(0); + + // Leading guard: covers the fixed header and any host-function + // metadata + let first_guard = addr.Value; + if let Err(e) = unsafe { + VirtualProtect( + first_guard, + leading_guard_size, + PAGE_NOACCESS, + &mut unused_old_prot, + ) + } { + log_then_return!(WindowsAPIError(e.clone())); + } + + // Trailing guard: the explicit PAGE_SIZE padding at the end of the file. + let last_guard = unsafe { first_guard.add(total_size - PAGE_SIZE_USIZE) }; + if let Err(e) = unsafe { + VirtualProtect( + last_guard, + PAGE_SIZE_USIZE, + PAGE_NOACCESS, + &mut unused_old_prot, + ) + } { + log_then_return!(WindowsAPIError(e.clone())); + } + + Ok(ReadonlySharedMemory { + region, + guest_mapped_size, + leading_guard_size, + }) + } + pub(crate) fn as_slice(&self) -> &[u8] { unsafe { std::slice::from_raw_parts(self.base_ptr(), self.mem_size()) } } @@ -2098,6 +2346,32 @@ impl SharedMemory for ReadonlySharedMemory { fn region(&self) -> &HostMapping { &self.region } + // Override the default trait accessors to use the variable-sized + // leading guard. The trailing guard is always `PAGE_SIZE_USIZE`. + fn base_addr(&self) -> usize { + self.region().ptr as usize + self.leading_guard_size + } + fn base_ptr(&self) -> *mut u8 { + self.region().ptr.wrapping_add(self.leading_guard_size) + } + fn mem_size(&self) -> usize { + self.region().size - self.leading_guard_size - PAGE_SIZE_USIZE + } + fn host_region_base(&self) -> ::HostBaseType { + #[cfg(not(windows))] + { + self.base_addr() + } + #[cfg(windows)] + { + super::memory_region::HostRegionBase { + from_handle: self.region().handle.into(), + handle_base: self.region().ptr as usize, + handle_size: self.region().size, + offset: self.leading_guard_size, + } + } + } // There's no way to get exclusive (and therefore writable) access // to a ReadonlySharedMemory. fn with_exclusivity T>( diff --git a/src/hyperlight_host/src/sandbox/host_funcs.rs b/src/hyperlight_host/src/sandbox/host_funcs.rs index a1430338b..a5c2244a6 100644 --- a/src/hyperlight_host/src/sandbox/host_funcs.rs +++ b/src/hyperlight_host/src/sandbox/host_funcs.rs @@ -35,8 +35,76 @@ pub struct FunctionRegistry { functions_map: HashMap, } -impl From<&mut FunctionRegistry> for HostFunctionDetails { - fn from(registry: &mut FunctionRegistry) -> Self { +/// A collection of host functions that can be supplied to a sandbox +/// constructor (e.g. [`crate::MultiUseSandbox::from_snapshot`]) to +/// expose host-side functionality to the guest. +/// +/// Use [`HostFunctions::default`] to start with the standard +/// `HostPrint` function pre-registered (matches the registry that the +/// regular `UninitializedSandbox` → `evolve()` path constructs), or +/// [`HostFunctions::new`] to start with an empty registry. +/// +/// Add additional host functions via the +/// [`crate::func::Registerable`] trait, just as you would on an +/// `UninitializedSandbox`. +/// +/// ```no_run +/// # use hyperlight_host::{HostFunctions, Result}; +/// # use hyperlight_host::func::Registerable; +/// # fn example() -> Result<()> { +/// // Default: HostPrint already registered. +/// let mut funcs = HostFunctions::default(); +/// funcs.register_host_function("Add", |a: i32, b: i32| Ok(a + b))?; +/// # Ok(()) +/// # } +/// ``` +pub struct HostFunctions(FunctionRegistry); + +impl HostFunctions { + /// Create an empty `HostFunctions` with no host functions + /// registered. + /// + /// Most callers want [`HostFunctions::default`] instead, which + /// pre-registers the standard `HostPrint` function. + pub fn new() -> Self { + Self(FunctionRegistry::default()) + } + + /// Consume this `HostFunctions` and return the inner registry. + pub(crate) fn into_inner(self) -> FunctionRegistry { + self.0 + } + + /// Borrow the inner registry mutably. + pub(crate) fn inner_mut(&mut self) -> &mut FunctionRegistry { + &mut self.0 + } + + /// Borrow the inner registry immutably. + pub(crate) fn inner(&self) -> &FunctionRegistry { + &self.0 + } +} + +impl Default for HostFunctions { + /// Create a `HostFunctions` pre-populated with the standard + /// `HostPrint` function (writes UTF-8 strings to the host's + /// stdout in green). + /// + /// This matches the default registry installed by + /// `UninitializedSandbox::new()`, so a snapshot taken from a + /// regular sandbox can be loaded with + /// `MultiUseSandbox::from_snapshot(snap, HostFunctions::default(), None)` + /// without registering anything else. + /// + /// Use [`HostFunctions::new`] for an empty registry. + fn default() -> Self { + Self(FunctionRegistry::with_default_host_print()) + } +} + +impl From<&FunctionRegistry> for HostFunctionDetails { + fn from(registry: &FunctionRegistry) -> Self { let host_functions = registry .functions_map .iter() @@ -61,15 +129,26 @@ pub struct FunctionEntry { impl FunctionRegistry { /// Register a host function with the sandbox. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn register_host_function( - &mut self, - name: String, - func: FunctionEntry, - ) -> Result<()> { + #[instrument(skip_all, parent = Span::current(), level = "Trace")] + pub(crate) fn register_host_function(&mut self, name: String, func: FunctionEntry) { self.functions_map.insert(name, func); + } - Ok(()) + /// Create a `FunctionRegistry` pre-populated with the default + /// `HostPrint` function (writes to stdout with green text). + pub(crate) fn with_default_host_print() -> Self { + use crate::func::host_functions::HostFunction; + use crate::func::{ParameterTuple, SupportedReturnType}; + + let mut registry = Self::default(); + let hf: HostFunction = default_writer_func.into(); + let entry = FunctionEntry { + function: hf.into(), + parameter_types: <(String,)>::TYPE, + return_type: ::TYPE, + }; + registry.register_host_function("HostPrint".to_string(), entry); + registry } /// Assuming a host function called `"HostPrint"` exists, and takes a @@ -118,7 +197,7 @@ impl FunctionRegistry { /// The default writer function is to write to stdout with green text. #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -pub(super) fn default_writer_func(s: String) -> Result { +fn default_writer_func(s: String) -> Result { match std::io::stdout().is_terminal() { false => { print!("{}", s); diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 241622cab..233f498df 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -145,6 +145,195 @@ impl MultiUseSandbox { self.pt_root_finder = Some(finder); } + /// Create a `MultiUseSandbox` directly from a [`Snapshot`], + /// bypassing [`UninitializedSandbox`](crate::UninitializedSandbox) + /// and [`evolve()`](crate::UninitializedSandbox::evolve). + /// + /// This is useful for fast sandbox creation when a snapshot of + /// an already-initialized guest is available, either saved to disk + /// or captured in memory from another sandbox. + /// + /// The provided [`HostFunctions`] must include every host function + /// that was registered on the sandbox at the time the snapshot was + /// taken (matched by name and signature). Additional host functions + /// not present in the snapshot are allowed. + /// + /// An optional [`SandboxConfiguration`](crate::sandbox::SandboxConfiguration) + /// can be supplied to override runtime settings such as timeouts and + /// interrupt behavior. Memory layout fields + /// (`input_data_size`, `output_data_size`, `heap_size`, `scratch_size`) + /// are always taken from the snapshot. Any values supplied in + /// `config` for those fields are ignored. + /// + /// # Examples + /// + /// From a snapshot taken on another sandbox: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox, UninitializedSandbox, GuestBinary}; + /// # fn example() -> Result<(), Box> { + /// // Create and initialize a sandbox the normal way + /// let mut sandbox: MultiUseSandbox = UninitializedSandbox::new( + /// GuestBinary::FilePath("guest.bin".into()), + /// None, + /// )?.evolve()?; + /// + /// // Capture a snapshot of the initialized state + /// let snapshot = sandbox.snapshot()?; + /// + /// // Create a new sandbox directly from the snapshot + /// let mut sandbox2 = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: i32 = sandbox2.call("GetValue", ())?; + /// # Ok(()) + /// # } + /// ``` + /// + /// From a snapshot loaded from disk: + /// + /// ```no_run + /// # use std::sync::Arc; + /// # use hyperlight_host::{HostFunctions, MultiUseSandbox}; + /// # use hyperlight_host::sandbox::snapshot::Snapshot; + /// # fn example() -> Result<(), Box> { + /// let snapshot = Arc::new(Snapshot::from_file("guest_snapshot.hls")?); + /// let mut sandbox = MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), None)?; + /// let result: String = sandbox.call("Echo", "hello".to_string())?; + /// # Ok(()) + /// # } + /// ``` + #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] + pub fn from_snapshot( + snapshot: Arc, + host_funcs: crate::HostFunctions, + config: Option, + ) -> Result { + use rand::RngExt; + + use crate::mem::ptr::RawPtr; + use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; + + // Validate that the provided host functions are a superset of + // those required by the snapshot. + snapshot.validate_host_functions(&host_funcs)?; + + let host_funcs = Arc::new(Mutex::new(host_funcs.into_inner())); + + let stack_top_gva = snapshot.stack_top_gva(); + // Start from the caller's config (if any) so runtime fields + // such as timeouts and interrupt knobs are honored, then + // overwrite the layout fields from the snapshot. The on-disk + // layout is fixed, so any layout values supplied by the + // caller are silently ignored. Warn if the caller passed a + // config whose layout fields disagree with the snapshot, so + // the override is at least visible. + let caller_supplied_config = config.is_some(); + let mut config = config.unwrap_or_default(); + if caller_supplied_config { + warn_on_layout_override(&config, snapshot.layout()); + } + config.set_input_data_size(snapshot.layout().input_data_size); + config.set_output_data_size(snapshot.layout().output_data_size); + config.set_heap_size(snapshot.layout().heap_size as u64); + config.set_scratch_size(snapshot.layout().get_scratch_size()); + let load_info = snapshot.load_info(); + + let mgr = crate::mem::mgr::SandboxMemoryManager::from_snapshot(&snapshot)?; + let (mut hshm, gshm) = mgr.build()?; + + let page_size = u32::try_from(page_size::get())? as usize; + + #[cfg(target_os = "linux")] + crate::signal_handlers::setup_signal_handlers(&config)?; + + // Build the runtime config from the caller's `SandboxConfiguration` + // so that `guest_core_dump` (crashdump) and `guest_debug_info` (gdb) + // take effect just like they do in the normal evolve path. + // `binary_path` and `entry_point` are not available from a snapshot + // and are left unset. This only affects metadata in core dumps. + #[cfg(any(crashdump, gdb))] + let rt_cfg = crate::sandbox::uninitialized::SandboxRuntimeConfig { + #[cfg(crashdump)] + binary_path: None, + #[cfg(gdb)] + debug_info: config.get_guest_debug_info(), + #[cfg(crashdump)] + guest_core_dump: config.get_guest_core_dump(), + #[cfg(crashdump)] + entry_point: None, + }; + + let mut vm = set_up_hypervisor_partition( + gshm, + &config, + stack_top_gva, + page_size, + #[cfg(any(crashdump, gdb))] + rt_cfg, + load_info, + )?; + + let seed = { + let mut rng = rand::rng(); + rng.random::() + }; + let peb_addr = RawPtr::from(u64::try_from(hshm.layout.peb_address())?); + + #[cfg(gdb)] + let dbg_mem_access_hdl = Arc::new(Mutex::new(hshm.clone())); + + vm.initialise( + peb_addr, + seed, + page_size as u32, + &mut hshm, + &host_funcs, + None, + #[cfg(gdb)] + dbg_mem_access_hdl, + ) + .map_err(crate::hypervisor::hyperlight_vm::HyperlightVmError::Initialize)?; + + // If the snapshot was taken from an already-initialized guest + // (NextAction::Call), apply the captured special registers so + // the guest resumes in the correct CPU state. + #[cfg(not(feature = "i686-guest"))] + if matches!(snapshot.entrypoint(), super::snapshot::NextAction::Call(_)) { + let sregs = snapshot.sregs().cloned().unwrap_or_else(|| { + crate::hypervisor::regs::CommonSpecialRegisters::standard_64bit_defaults( + hshm.layout.get_pt_base_gpa(), + ) + }); + vm.apply_sregs(hshm.layout.get_pt_base_gpa(), &sregs) + .map_err(|e| { + crate::HyperlightError::HyperlightVmError( + crate::hypervisor::hyperlight_vm::HyperlightVmError::Restore(e), + ) + })?; + } + + #[cfg(gdb)] + let dbg_mem_wrapper = Arc::new(Mutex::new(hshm.clone())); + + let mut sbox = MultiUseSandbox::from_uninit( + host_funcs, + hshm, + vm, + #[cfg(gdb)] + dbg_mem_wrapper, + ); + // Use the snapshot's sandbox_id so that restore() back to this + // snapshot is permitted. The id is process-local and never + // persisted to disk: `Snapshot::from_file` assigns a fresh id + // on every load, so two `from_file` calls of the same path + // yield restore-incompatible sandboxes (which is the intended + // safer default). Sandboxes built from clones of the same + // in-memory `Arc` share the id and are mutually + // restore-compatible. + sbox.id = snapshot.sandbox_id(); + Ok(sbox) + } + /// Creates a snapshot of the sandbox's current memory state. /// /// The snapshot is tied to this specific sandbox instance and can only be @@ -207,6 +396,11 @@ impl MultiUseSandbox { .get_snapshot_sregs() .map_err(|e| HyperlightError::HyperlightVmError(e.into()))?; let entrypoint = self.vm.get_entrypoint(); + let host_functions = (&*self.host_funcs.try_lock().map_err(|e| { + crate::new_error!("Error locking host_funcs at {}:{}: {}", file!(), line!(), e) + })?) + .into(); + let memory_snapshot = self.mem_mgr.snapshot( self.id, mapped_regions_vec, @@ -214,6 +408,7 @@ impl MultiUseSandbox { stack_top_gpa, sregs, entrypoint, + host_functions, )?; let snapshot = Arc::new(memory_snapshot); self.snapshot = Some(snapshot.clone()); @@ -943,6 +1138,48 @@ impl std::fmt::Debug for MultiUseSandbox { } } +/// Emit a warning for each memory-layout field in `caller` that +/// disagrees with `snapshot`. Used by [`MultiUseSandbox::from_snapshot`] +/// to surface ignored caller-supplied layout values, since those +/// fields are always taken from the snapshot. +fn warn_on_layout_override( + caller: &crate::sandbox::SandboxConfiguration, + snapshot: &crate::mem::layout::SandboxMemoryLayout, +) { + let mismatches: &[(&str, u64, u64)] = &[ + ( + "input_data_size", + caller.get_input_data_size() as u64, + snapshot.input_data_size as u64, + ), + ( + "output_data_size", + caller.get_output_data_size() as u64, + snapshot.output_data_size as u64, + ), + ( + "heap_size", + caller.get_heap_size(), + snapshot.heap_size as u64, + ), + ( + "scratch_size", + caller.get_scratch_size() as u64, + snapshot.get_scratch_size() as u64, + ), + ]; + for (name, supplied, snap) in mismatches { + if supplied != snap { + tracing::warn!( + "from_snapshot ignoring caller-supplied {} ({}); using snapshot value ({})", + name, + supplied, + snap + ); + } + } +} + #[cfg(test)] mod tests { use std::sync::{Arc, Barrier}; diff --git a/src/hyperlight_host/src/sandbox/snapshot/file.rs b/src/hyperlight_host/src/sandbox/snapshot/file.rs new file mode 100644 index 000000000..8c06decdc --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file.rs @@ -0,0 +1,1031 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Snapshot file format: serialization, deserialization, and the +//! associated `Snapshot::to_file` / `Snapshot::from_file` methods. +//! +//! The on-disk byte layout is whatever the three `#[repr(C)]` POD +//! structs below declare: `RawPreamble`, `RawHeaderV1`, `RawSregs`. +//! Each derives `bytemuck::Pod` and `bytemuck::Zeroable`, which the +//! derive macro proves at compile time means there is no padding and +//! every bit pattern is a valid value of the struct's fields. There +//! are no separate numeric offsets to keep in sync with the code. +//! +//! All multi-byte integers are little-endian (gated by a +//! `compile_error!` on big-endian targets below). +//! +//! The fixed-position prefix is followed by an optional host-function +//! flatbuffer of length `host_funcs_size`, then zero padding to the +//! next PAGE_SIZE boundary, then the memory blob (the mmap target). +//! The memory blob's file offset is recorded in `memory_offset` and +//! is always page-aligned. A PAGE_SIZE trailing zero region follows +//! the blob (Windows guard-page backing). +//! +//! ```text +//! +----------------------+ +//! | RawPreamble | magic "HLS\0" + format_version +//! +----------------------+ +//! | RawHeaderV1 | arch, abi_version, hash, stack_top_gva, +//! | | entrypoint tag+addr, layout fields, +//! | | memory_size, memory_offset, has_sregs, +//! | | hypervisor, host_funcs_size +//! +----------------------+ +//! | RawSregs | segments, tables, control regs, bitmap. +//! | | Always written; ignored on load if +//! | | has_sregs == 0. +//! +----------------------+ +//! | host_funcs blob | host_funcs_size bytes (0 if absent), +//! | | serialized HostFunctionDetails flatbuffer +//! +----------------------+ +//! | zero padding | pads to next PAGE_SIZE boundary +//! +----------------------+ <- memory_offset +//! | memory blob | memory_size bytes (mmap target) +//! +----------------------+ +//! | trailing PAGE_SIZE | Windows guard-page backing; ignored on Linux +//! +----------------------+ +//! ``` +//! +//! `memory_offset == align_up(FIXED_PREFIX_SIZE + host_funcs_size, +//! PAGE_SIZE)`, where `FIXED_PREFIX_SIZE = sizeof(RawPreamble) + +//! sizeof(RawHeaderV1) + sizeof(RawSregs)`. With no host functions +//! this lands at exactly PAGE_SIZE. + +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; +use hyperlight_common::vmem::PAGE_SIZE; + +use super::{NextAction, SANDBOX_CONFIGURATION_COUNTER, Snapshot}; +use crate::hypervisor::regs::CommonSpecialRegisters; +use crate::mem::layout::SandboxMemoryLayout; +use crate::mem::shared_mem::{ReadonlySharedMemory, SharedMemory}; + +const SNAPSHOT_MAGIC: &[u8; 4] = b"HLS\0"; + +/// ABI version for the snapshot memory blob. This must be bumped +/// whenever a change affects the contents or interpretation of the +/// memory blob - i.e., the contract between the host runtime and +/// the guest binary that determines how snapshot memory is produced +/// and consumed. +/// +/// Examples of changes that require a bump: +/// +/// - Memory layout: `SandboxMemoryLayout` offset computation, memory +/// region definitions, page table format +/// - Host-guest interface: PEB struct layout, calling convention, +/// dispatch mechanism, input/output buffer format +/// - Guest init state: entry point setup, GDT/IDT/TSS initialization, +/// or any startup code in `hyperlight_guest_bin` whose results are +/// captured in the snapshot (e.g. sregs) +/// +/// Unlike `FormatVersion` (which covers the file header byte layout +/// and may allow conversion between versions), an ABI mismatch means +/// the memory blob is incompatible and the snapshot must be +/// regenerated from the guest binary. +const SNAPSHOT_ABI_VERSION: u32 = 1; + +/// Maximum size of the host-functions flatbuffer blob in a snapshot +/// file. Bounds the allocation done at load time before the +/// flatbuffer is parsed. The legitimate size for hundreds of host +/// functions is well under this cap. +const MAX_HOST_FUNCS_SIZE: u64 = 1024 * 1024; + +/// Snapshot file format version. +#[derive(Copy, Clone, Debug, PartialEq)] +enum FormatVersion { + V1 = 1, +} + +impl FormatVersion { + fn from_u32(v: u32) -> crate::Result { + match v { + 1 => Ok(Self::V1), + _ => Err(crate::new_error!( + "unsupported snapshot format version {} (this build supports V1). \ + The file header layout may be convertible to the current format", + v + )), + } + } +} + +/// Architecture tag for snapshot files. +#[derive(Copy, Clone, Debug, PartialEq)] +enum ArchTag { + X86_64 = 1, + Aarch64 = 2, + I686 = 3, +} + +impl ArchTag { + fn current() -> Self { + #[cfg(feature = "i686-guest")] + { + Self::I686 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "x86_64"))] + { + Self::X86_64 + } + #[cfg(all(not(feature = "i686-guest"), target_arch = "aarch64"))] + { + Self::Aarch64 + } + } + + fn from_u32(v: u32) -> crate::Result { + match v { + 1 => Ok(Self::X86_64), + 2 => Ok(Self::Aarch64), + 3 => Ok(Self::I686), + _ => Err(crate::new_error!("unknown architecture tag: {}", v)), + } + } +} + +/// Hypervisor tag for snapshot files. +/// +/// Segment register hidden-cache fields (unusable, type_, granularity, +/// db) differ between hypervisors for the same architectural state. +/// Restoring sregs captured on one hypervisor into another may be +/// rejected or produce subtly wrong behavior. The tag ensures +/// snapshots are only loaded on the same hypervisor that created them. +#[derive(Copy, Clone, Debug, PartialEq)] +pub(super) enum HypervisorTag { + Kvm = 1, + Mshv = 2, + Whp = 3, +} + +impl HypervisorTag { + pub(super) fn current() -> Option { + #[allow(unused_imports)] + use crate::hypervisor::virtual_machine::HypervisorType; + use crate::hypervisor::virtual_machine::get_available_hypervisor; + + match get_available_hypervisor() { + #[cfg(kvm)] + Some(HypervisorType::Kvm) => Some(Self::Kvm), + #[cfg(mshv3)] + Some(HypervisorType::Mshv) => Some(Self::Mshv), + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => Some(Self::Whp), + None => None, + } + } + + fn from_u64(v: u64) -> crate::Result { + match v { + 1 => Ok(Self::Kvm), + 2 => Ok(Self::Mshv), + 3 => Ok(Self::Whp), + _ => Err(crate::new_error!("unknown hypervisor tag: {}", v)), + } + } + + fn name(&self) -> &'static str { + match self { + Self::Kvm => "KVM", + Self::Mshv => "MSHV", + Self::Whp => "WHP", + } + } +} + +// All raw header structs use little-endian on-disk encoding. Both +// supported architectures (x86_64, aarch64) are little-endian, so we +// just refuse to compile on a hypothetical big-endian target rather +// than byte-swap on every field access. +#[cfg(not(target_endian = "little"))] +compile_error!("snapshot file format requires a little-endian target"); + +/// Memory layout fields stored in the snapshot file (validated form). +/// These are the primary inputs needed to reconstruct a `SandboxMemoryLayout`. +struct LayoutFields { + input_data_size: usize, + output_data_size: usize, + heap_size: usize, + code_size: usize, + init_data_size: usize, + init_data_permissions: Option, + scratch_size: usize, + snapshot_size: usize, + pt_size: Option, +} + +/// Fixed preamble at the start of every snapshot file (validated form). +/// Never changes across format versions so it can always be read to +/// determine which version-specific header follows. +struct SnapshotPreamble { + magic: [u8; 4], + format_version: FormatVersion, +} + +/// V1 snapshot header (validated form). +struct SnapshotHeaderV1 { + arch: ArchTag, + abi_version: u32, + stack_top_gva: u64, + entrypoint: NextAction, + layout: LayoutFields, + memory_size: usize, + memory_offset: u64, + has_sregs: bool, + hypervisor: HypervisorTag, + /// Byte length of the host-function-details flatbuffer that + /// follows the fixed header. `0` means no host functions are + /// stored. + host_funcs_size: u64, +} + +// --- Raw POD on-disk structs --- +// +// These mirror the bytes on disk one-for-one. Reading and writing +// goes through `bytemuck`; field-level validation lives in `From` / +// `TryFrom` impls below. + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawPreamble { + pub(super) magic: [u8; 4], + pub(super) format_version: u32, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawHeaderV1 { + pub(super) arch: u32, + pub(super) abi_version: u32, + pub(super) stack_top_gva: u64, + pub(super) entrypoint_tag: u64, + pub(super) entrypoint_addr: u64, + pub(super) input_data_size: u64, + pub(super) output_data_size: u64, + pub(super) heap_size: u64, + pub(super) code_size: u64, + pub(super) init_data_size: u64, + pub(super) init_data_permissions: u64, + pub(super) scratch_size: u64, + pub(super) snapshot_size: u64, + pub(super) pt_size: u64, + pub(super) memory_size: u64, + pub(super) memory_offset: u64, + pub(super) has_sregs: u64, + pub(super) hypervisor: u64, + pub(super) host_funcs_size: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawSegmentRegister { + base: u64, + limit: u64, + selector: u64, + type_: u64, + present: u64, + dpl: u64, + db: u64, + s: u64, + l: u64, + g: u64, + avl: u64, + unusable: u64, + padding: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawTableRegister { + base: u64, + limit: u64, +} + +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +struct RawSregs { + cs: RawSegmentRegister, + ds: RawSegmentRegister, + es: RawSegmentRegister, + fs: RawSegmentRegister, + gs: RawSegmentRegister, + ss: RawSegmentRegister, + tr: RawSegmentRegister, + ldt: RawSegmentRegister, + gdt: RawTableRegister, + idt: RawTableRegister, + cr0: u64, + cr2: u64, + cr3: u64, + cr4: u64, + cr8: u64, + efer: u64, + apic_base: u64, + interrupt_bitmap: [u64; 4], +} + +/// Integrity hashes. Sits between the fixed structural prefix +/// (preamble + header + sregs) and the variable-length +/// host-functions blob, on the fixed/variable boundary. +/// +/// `header_hash` covers `preamble || header || sregs || +/// host_funcs_blob` (everything except the hashes themselves and +/// the memory blob). Always verified. +/// +/// `blob_hash` covers the memory blob. Skipped by +/// `from_file_unchecked`. `blob_hash` is one of the bytes covered +/// by `header_hash`, so an attacker cannot swap a blob without +/// invalidating the always-checked header hash. +#[derive(Copy, Clone, bytemuck::Pod, bytemuck::Zeroable)] +#[repr(C)] +pub(super) struct RawHashes { + pub(super) header_hash: [u8; 32], + pub(super) blob_hash: [u8; 32], +} + +/// Total byte length of the fixed-position prefix of a V1 snapshot +/// file (preamble + header + sregs + hashes). The `bytemuck::Pod` +/// derives on the raw structs already guarantee they have no +/// padding, so this is exactly the on-disk byte count. +pub(super) const FIXED_PREFIX_SIZE: usize = std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::() + + std::mem::size_of::(); + +// --- Raw <-> rich conversions --- + +impl From<&SnapshotPreamble> for RawPreamble { + fn from(p: &SnapshotPreamble) -> Self { + Self { + magic: p.magic, + format_version: p.format_version as u32, + } + } +} + +impl TryFrom for SnapshotPreamble { + type Error = crate::HyperlightError; + fn try_from(raw: RawPreamble) -> crate::Result { + if &raw.magic != SNAPSHOT_MAGIC { + return Err(crate::new_error!( + "invalid snapshot file: bad magic bytes (expected {:?}, got {:?})", + SNAPSHOT_MAGIC, + raw.magic + )); + } + Ok(Self { + magic: raw.magic, + format_version: FormatVersion::from_u32(raw.format_version)?, + }) + } +} + +impl From<&SnapshotHeaderV1> for RawHeaderV1 { + fn from(h: &SnapshotHeaderV1) -> Self { + let (entrypoint_tag, entrypoint_addr) = match h.entrypoint { + NextAction::Initialise(a) => (0u64, a), + NextAction::Call(a) => (1u64, a), + #[cfg(test)] + NextAction::None => (u64::MAX, 0), + }; + let l = &h.layout; + Self { + arch: h.arch as u32, + abi_version: h.abi_version, + stack_top_gva: h.stack_top_gva, + entrypoint_tag, + entrypoint_addr, + input_data_size: l.input_data_size as u64, + output_data_size: l.output_data_size as u64, + heap_size: l.heap_size as u64, + code_size: l.code_size as u64, + init_data_size: l.init_data_size as u64, + init_data_permissions: l.init_data_permissions.map_or(0, |f| f.bits() as u64), + scratch_size: l.scratch_size as u64, + snapshot_size: l.snapshot_size as u64, + pt_size: l.pt_size.map_or(0, |v| v as u64), + memory_size: h.memory_size as u64, + memory_offset: h.memory_offset, + has_sregs: if h.has_sregs { 1 } else { 0 }, + hypervisor: h.hypervisor as u64, + host_funcs_size: h.host_funcs_size, + } + } +} + +impl TryFrom for SnapshotHeaderV1 { + type Error = crate::HyperlightError; + fn try_from(raw: RawHeaderV1) -> crate::Result { + use crate::mem::memory_region::MemoryRegionFlags; + + let arch = ArchTag::from_u32(raw.arch)?; + let entrypoint = match raw.entrypoint_tag { + 0 => NextAction::Initialise(raw.entrypoint_addr), + 1 => NextAction::Call(raw.entrypoint_addr), + _ => { + return Err(crate::new_error!( + "invalid entrypoint tag in snapshot: {}", + raw.entrypoint_tag + )); + } + }; + let init_data_permissions = if raw.init_data_permissions == 0 { + None + } else { + // Field is `u64` on disk for layout uniformity but the + // flag set is `u32`. Reject any high bits before + // narrowing so we don't silently truncate them. + let bits = u32::try_from(raw.init_data_permissions).map_err(|_| { + crate::new_error!( + "snapshot init_data_permissions ({:#x}) exceeds u32 range", + raw.init_data_permissions + ) + })?; + Some(MemoryRegionFlags::from_bits(bits).ok_or_else(|| { + crate::new_error!("snapshot contains unknown memory region flags: {:#x}", bits) + })?) + }; + let pt_size = if raw.pt_size == 0 { + None + } else { + Some(raw.pt_size as usize) + }; + let has_sregs = match raw.has_sregs { + 0 => false, + 1 => true, + other => { + return Err(crate::new_error!( + "snapshot has_sregs must be 0 or 1, got {}", + other + )); + } + }; + let hypervisor = HypervisorTag::from_u64(raw.hypervisor)?; + Ok(Self { + arch, + abi_version: raw.abi_version, + stack_top_gva: raw.stack_top_gva, + entrypoint, + layout: LayoutFields { + input_data_size: raw.input_data_size as usize, + output_data_size: raw.output_data_size as usize, + heap_size: raw.heap_size as usize, + code_size: raw.code_size as usize, + init_data_size: raw.init_data_size as usize, + init_data_permissions, + scratch_size: raw.scratch_size as usize, + snapshot_size: raw.snapshot_size as usize, + pt_size, + }, + memory_size: raw.memory_size as usize, + memory_offset: raw.memory_offset, + has_sregs, + hypervisor, + host_funcs_size: raw.host_funcs_size, + }) + } +} + +impl SnapshotHeaderV1 { + /// File-bound and environment validation: checks that a + /// well-formed header (already produced by `TryFrom`) is also + /// consistent with the actual file size and the runtime + /// environment (architecture, hypervisor, ABI version). + fn validate_against_file(&self, file_len: u64) -> crate::Result<()> { + if self.arch != ArchTag::current() { + return Err(crate::new_error!( + "snapshot architecture mismatch: expected {:?}, got {:?}", + ArchTag::current(), + self.arch + )); + } + if self.abi_version != SNAPSHOT_ABI_VERSION { + return Err(crate::new_error!( + "snapshot ABI version mismatch: file has ABI version {}, \ + but this build expects {}. The snapshot must be regenerated \ + from the guest binary.", + self.abi_version, + SNAPSHOT_ABI_VERSION + )); + } + let current_hv = HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to load snapshot"))?; + if self.hypervisor != current_hv { + return Err(crate::new_error!( + "snapshot hypervisor mismatch: file was created on {} but the current hypervisor is {}.", + self.hypervisor.name(), + current_hv.name() + )); + } + + if self.memory_offset == 0 || self.memory_offset % PAGE_SIZE as u64 != 0 { + return Err(crate::new_error!( + "invalid snapshot memory_offset {} (must be a non-zero multiple of PAGE_SIZE)", + self.memory_offset + )); + } + + // host_funcs region must fit between the fixed prefix and + // the page-aligned memory_offset. + if self.host_funcs_size > MAX_HOST_FUNCS_SIZE { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) exceeds maximum ({})", + self.host_funcs_size, + MAX_HOST_FUNCS_SIZE + )); + } + let after_hf = (FIXED_PREFIX_SIZE as u64) + .checked_add(self.host_funcs_size) + .ok_or_else(|| { + crate::new_error!( + "snapshot host_funcs_size ({}) overflows file offset", + self.host_funcs_size + ) + })?; + if after_hf > self.memory_offset { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) does not fit between fixed prefix and memory_offset ({})", + self.host_funcs_size, + self.memory_offset + )); + } + // host_funcs region must fit in the file too (defends + // against memory_offset being extended past the file end by + // a malicious header). + let max_host_funcs = file_len.saturating_sub(FIXED_PREFIX_SIZE as u64); + if self.host_funcs_size > max_host_funcs { + return Err(crate::new_error!( + "snapshot host_funcs_size ({}) exceeds remaining file bytes ({})", + self.host_funcs_size, + max_host_funcs + )); + } + + // Memory blob plus the trailing PAGE_SIZE guard must fit in + // the file. + let blob_end = self + .memory_offset + .checked_add(self.memory_size as u64) + .and_then(|n| n.checked_add(PAGE_SIZE as u64)) + .ok_or_else(|| crate::new_error!("snapshot memory blob bounds overflow"))?; + if blob_end > file_len { + return Err(crate::new_error!( + "snapshot memory blob extends past the end of the file (need {} bytes, file has {})", + blob_end, + file_len + )); + } + + // `entrypoint` and `has_sregs` must agree: a `Call` snapshot + // is mid-execution and therefore has a captured sregs state, + // while an `Initialise` snapshot has not yet run on the vCPU + // and has none. Anything else is a malformed header. + let call_entry = matches!(self.entrypoint, NextAction::Call(_)); + if call_entry != self.has_sregs { + return Err(crate::new_error!( + "snapshot entrypoint and has_sregs disagree: entrypoint requires sregs={}, has_sregs={}", + call_entry, + self.has_sregs + )); + } + + Ok(()) + } +} + +impl From<&CommonSpecialRegisters> for RawSregs { + fn from(s: &CommonSpecialRegisters) -> Self { + let seg = |r: &crate::hypervisor::regs::CommonSegmentRegister| RawSegmentRegister { + base: r.base, + limit: r.limit as u64, + selector: r.selector as u64, + type_: r.type_ as u64, + present: r.present as u64, + dpl: r.dpl as u64, + db: r.db as u64, + s: r.s as u64, + l: r.l as u64, + g: r.g as u64, + avl: r.avl as u64, + unusable: r.unusable as u64, + padding: r.padding as u64, + }; + let tab = |r: &crate::hypervisor::regs::CommonTableRegister| RawTableRegister { + base: r.base, + limit: r.limit as u64, + }; + Self { + cs: seg(&s.cs), + ds: seg(&s.ds), + es: seg(&s.es), + fs: seg(&s.fs), + gs: seg(&s.gs), + ss: seg(&s.ss), + tr: seg(&s.tr), + ldt: seg(&s.ldt), + gdt: tab(&s.gdt), + idt: tab(&s.idt), + cr0: s.cr0, + cr2: s.cr2, + cr3: s.cr3, + cr4: s.cr4, + cr8: s.cr8, + efer: s.efer, + apic_base: s.apic_base, + interrupt_bitmap: s.interrupt_bitmap, + } + } +} + +impl From for CommonSpecialRegisters { + fn from(r: RawSregs) -> Self { + use crate::hypervisor::regs::{CommonSegmentRegister, CommonTableRegister}; + // Truncating casts are intentional and lossless on + // well-formed input: the original fields have those widths + // and were widened to u64 only for on-disk uniformity. + let seg = |s: RawSegmentRegister| CommonSegmentRegister { + base: s.base, + limit: s.limit as u32, + selector: s.selector as u16, + type_: s.type_ as u8, + present: s.present as u8, + dpl: s.dpl as u8, + db: s.db as u8, + s: s.s as u8, + l: s.l as u8, + g: s.g as u8, + avl: s.avl as u8, + unusable: s.unusable as u8, + padding: s.padding as u8, + }; + let tab = |t: RawTableRegister| CommonTableRegister { + base: t.base, + limit: t.limit as u16, + }; + Self { + cs: seg(r.cs), + ds: seg(r.ds), + es: seg(r.es), + fs: seg(r.fs), + gs: seg(r.gs), + ss: seg(r.ss), + tr: seg(r.tr), + ldt: seg(r.ldt), + gdt: tab(r.gdt), + idt: tab(r.idt), + cr0: r.cr0, + cr2: r.cr2, + cr3: r.cr3, + cr4: r.cr4, + cr8: r.cr8, + efer: r.efer, + apic_base: r.apic_base, + interrupt_bitmap: r.interrupt_bitmap, + } + } +} + +impl Snapshot { + /// Save this snapshot to a file on disk. + /// + /// The file format uses a page-aligned memory blob that can be + /// mmapped directly on load for zero-copy instantiation. + /// + /// If a file already exists at `path`, it is truncated and + /// overwritten. + /// + /// # Portability + /// + /// Snapshot files are **not portable** across CPU architectures, + /// hypervisors, or operating systems. All three are checked at + /// load time and a mismatch produces an error. + pub fn to_file(&self, path: impl AsRef) -> crate::Result<()> { + use std::io::{BufWriter, Write}; + + let file = std::fs::File::create(path.as_ref()) + .map_err(|e| crate::new_error!("failed to create snapshot file: {}", e))?; + let mut w = BufWriter::new(file); + + let layout = &self.layout; + + // Serialize host-function metadata up-front so we can compute + // `memory_offset` (which depends on `host_funcs_size`) before + // writing the header. + let host_funcs_bytes: Vec = if self + .host_functions + .host_functions + .as_ref() + .is_some_and(|v| !v.is_empty()) + { + (&self.host_functions).try_into().map_err(|e| { + crate::new_error!("failed to serialize host function details: {:?}", e) + })? + } else { + Vec::new() + }; + + // The memory blob sits immediately after the host-function + // blob, page-aligned. With no host functions this lands at + // exactly PAGE_SIZE. + let memory_offset = + (FIXED_PREFIX_SIZE + host_funcs_bytes.len()).next_multiple_of(PAGE_SIZE) as u64; + + let preamble = SnapshotPreamble { + magic: *SNAPSHOT_MAGIC, + format_version: FormatVersion::V1, + }; + let v1 = SnapshotHeaderV1 { + arch: ArchTag::current(), + abi_version: SNAPSHOT_ABI_VERSION, + stack_top_gva: self.stack_top_gva, + entrypoint: self.entrypoint, + layout: LayoutFields { + input_data_size: layout.input_data_size, + output_data_size: layout.output_data_size, + heap_size: layout.heap_size, + code_size: layout.code_size, + init_data_size: layout.init_data_size, + init_data_permissions: layout.init_data_permissions, + scratch_size: layout.get_scratch_size(), + snapshot_size: layout.snapshot_size, + pt_size: layout.pt_size, + }, + memory_size: self.memory.mem_size(), + memory_offset, + has_sregs: self.sregs.is_some(), + hypervisor: HypervisorTag::current() + .ok_or_else(|| crate::new_error!("no hypervisor available to tag snapshot"))?, + host_funcs_size: host_funcs_bytes.len() as u64, + }; + let sregs = self.sregs.unwrap_or_default(); + + let raw_preamble = RawPreamble::from(&preamble); + let raw_header = RawHeaderV1::from(&v1); + let raw_sregs = RawSregs::from(&sregs); + + // `blob_hash` covers the memory blob. `header_hash` covers + // everything else of integrity interest: preamble, header, + // sregs, and the host_funcs blob. `blob_hash` is itself part + // of the bytes covered by `header_hash`, so swapping a blob + // without rewriting the header invalidates the always-checked + // header hash. + let blob_hash: [u8; 32] = blake3::hash(self.memory.as_slice()).into(); + let mut hasher = blake3::Hasher::new(); + hasher.update(bytemuck::bytes_of(&raw_preamble)); + hasher.update(bytemuck::bytes_of(&raw_header)); + hasher.update(bytemuck::bytes_of(&raw_sregs)); + hasher.update(&host_funcs_bytes); + let header_hash: [u8; 32] = hasher.finalize().into(); + let raw_hashes = RawHashes { + header_hash, + blob_hash, + }; + + w.write_all(bytemuck::bytes_of(&raw_preamble)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_header)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_sregs)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + w.write_all(bytemuck::bytes_of(&raw_hashes)) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + // Host function metadata (variable length, not mmapped). + if !host_funcs_bytes.is_empty() { + w.write_all(&host_funcs_bytes) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + } + + // Zero-pad up to the page-aligned memory_offset so the blob + // is mmap-aligned in the file. + let pre_blob_pos = FIXED_PREFIX_SIZE + host_funcs_bytes.len(); + debug_assert!(pre_blob_pos <= memory_offset as usize); + w.write_all(&vec![0u8; memory_offset as usize - pre_blob_pos]) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + w.write_all(self.memory.as_slice()) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + // Trailing PAGE_SIZE padding: Windows read-only file mappings + // cannot extend beyond the file's actual size, so the file must + // contain backing bytes for the trailing guard page used by + // ReadonlySharedMemory::from_file_windows. Linux ignores this + // padding (its guard pages come from an anonymous mmap reservation). + w.write_all(&[0u8; PAGE_SIZE]) + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + w.flush() + .map_err(|e| crate::new_error!("snapshot write error: {}", e))?; + + Ok(()) + } + + /// Load a snapshot from a file on disk. + /// + /// The memory blob is mapped directly from the file for zero-copy + /// loading using platform-specific CoW mechanisms. + /// + /// Returns an error if the file is from a different CPU + /// architecture, hypervisor, or OS than this host. See + /// [`Snapshot::to_file`] for the full portability story. + /// + /// Note: ELF unwind info (`LoadInfo`) is not persisted in the + /// snapshot file, so the `mem_profile` feature will not have + /// accurate profiling data for sandboxes created from disk + /// snapshots. + /// + /// # File-mutation hazard + /// + /// The file at `path` must not be modified, truncated, renamed + /// over, or deleted while the returned `Snapshot` (or any + /// [`MultiUseSandbox`](crate::MultiUseSandbox) constructed from + /// it) is still alive. + pub fn from_file(path: impl AsRef) -> crate::Result { + Self::from_file_impl(path, true) + } + + /// Load a snapshot from a file on disk without verifying the + /// memory blob's content hash. The fixed-prefix integrity check + /// (preamble + header + sregs + host_funcs) is still performed. + /// + /// This is faster for large snapshots in trusted environments + /// where blob integrity is guaranteed by other means. All other + /// portability checks (architecture, hypervisor, OS) still + /// apply. See [`Snapshot::to_file`] for details. + /// + /// # File-mutation hazard + /// + /// The file at `path` must not be modified, truncated, renamed + /// over, or deleted while the returned `Snapshot` (or any + /// [`MultiUseSandbox`](crate::MultiUseSandbox) constructed from + /// it) is still alive. + pub fn from_file_unchecked(path: impl AsRef) -> crate::Result { + Self::from_file_impl(path, false) + } + + fn from_file_impl( + path: impl AsRef, + verify_blob_hash: bool, + ) -> crate::Result { + use std::io::BufReader; + + let file = std::fs::File::open(path.as_ref()) + .map_err(|e| crate::new_error!("failed to open snapshot file: {}", e))?; + let file_len = file + .metadata() + .map_err(|e| crate::new_error!("snapshot stat error: {}", e))? + .len(); + let mut r = BufReader::new(&file); + + // Phase 1: read raw bytes into POD structs. + use std::io::Read; + let mut preamble_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut preamble_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let raw_preamble: RawPreamble = bytemuck::pod_read_unaligned(&preamble_buf); + // Validate magic + format version. Future format versions + // would dispatch here on `preamble.format_version`. + let preamble = SnapshotPreamble::try_from(raw_preamble)?; + let mut header_buf = [0u8; std::mem::size_of::()]; + let raw_v1: RawHeaderV1 = match preamble.format_version { + FormatVersion::V1 => { + r.read_exact(&mut header_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + bytemuck::pod_read_unaligned(&header_buf) + } + }; + let mut sregs_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut sregs_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let raw_sregs: RawSregs = bytemuck::pod_read_unaligned(&sregs_buf); + + let mut hashes_buf = [0u8; std::mem::size_of::()]; + r.read_exact(&mut hashes_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + let hashes: RawHashes = bytemuck::pod_read_unaligned(&hashes_buf); + + // Phase 2: parse + validate against the file size and the + // current runtime environment. + let hdr = SnapshotHeaderV1::try_from(raw_v1)?; + hdr.validate_against_file(file_len)?; + + // Read the optional host-function-details blob into a + // buffer. It is needed both for `header_hash` verification + // and for the flatbuffer parse below. + let mut host_funcs_buf = vec![0u8; hdr.host_funcs_size as usize]; + if !host_funcs_buf.is_empty() { + r.read_exact(&mut host_funcs_buf) + .map_err(|e| crate::new_error!("snapshot read error: {}", e))?; + } + + // Phase 3: verify `header_hash` over (preamble || header || + // sregs || host_funcs). Always performed, regardless of + // `verify_blob_hash`. Cheap and closes the malicious-header + // surface. The hashes themselves sit on disk between sregs + // and host_funcs and are not part of what is hashed. + { + let mut hasher = blake3::Hasher::new(); + hasher.update(&preamble_buf); + hasher.update(&header_buf); + hasher.update(&sregs_buf); + hasher.update(&host_funcs_buf); + let computed: [u8; 32] = hasher.finalize().into(); + if computed != hashes.header_hash { + return Err(crate::new_error!( + "snapshot header_hash mismatch: file may be corrupted or tampered" + )); + } + } + + // Reconstruct layout + let l = &hdr.layout; + let mut cfg = crate::sandbox::SandboxConfiguration::default(); + cfg.set_input_data_size(l.input_data_size); + cfg.set_output_data_size(l.output_data_size); + cfg.set_heap_size(l.heap_size as u64); + cfg.set_scratch_size(l.scratch_size); + let mut layout = + SandboxMemoryLayout::new(cfg, l.code_size, l.init_data_size, l.init_data_permissions)?; + // Order matters: `set_pt_size` mutates `snapshot_size` + // internally, so call it before + if let Some(pt) = l.pt_size { + layout.set_pt_size(pt)?; + } + layout.set_snapshot_size(l.snapshot_size); + + let sregs = if hdr.has_sregs { + Some(CommonSpecialRegisters::from(raw_sregs)) + } else { + None + }; + + let host_functions = if !host_funcs_buf.is_empty() { + HostFunctionDetails::try_from(host_funcs_buf.as_slice()) + .map_err(|e| crate::new_error!("failed to parse host function details: {:?}", e))? + } else { + HostFunctionDetails { + host_functions: None, + } + }; + + // Map the memory blob directly from the file (zero-copy CoW). + // When the blob contains a PT tail (memory_size > snapshot_size), + // only snapshot_size bytes should be mapped into guest PA space. + let guest_mapped_size = if hdr.memory_size > layout.snapshot_size { + Some(layout.snapshot_size) + } else { + None + }; + let memory = ReadonlySharedMemory::from_file( + &file, + hdr.memory_offset.try_into().map_err(|_| { + crate::new_error!( + "snapshot memory_offset {} exceeds usize range", + hdr.memory_offset + ) + })?, + hdr.memory_size, + guest_mapped_size, + )?; + + // Phase 4: verify the memory blob's hash. Skipped by + // `from_file_unchecked` since this is the expensive check + // (proportional to blob size). + if verify_blob_hash { + let computed: [u8; 32] = blake3::hash(memory.as_slice()).into(); + if computed != hashes.blob_hash { + return Err(crate::new_error!( + "snapshot hash mismatch: file may be corrupted" + )); + } + } + + Ok(Snapshot { + sandbox_id: SANDBOX_CONFIGURATION_COUNTER + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + layout, + memory, + regions: Vec::new(), + load_info: crate::mem::exe::LoadInfo::dummy(), + // In-memory `Snapshot::hash` is `blake3(memory)` (matches + // `Snapshot::new`/`Snapshot::from_env`), used as the + // `PartialEq` key. This is the on-disk `blob_hash`. + hash: hashes.blob_hash, + stack_top_gva: hdr.stack_top_gva, + sregs, + entrypoint: hdr.entrypoint, + snapshot_generation: 0, + host_functions, + }) + } +} diff --git a/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs new file mode 100644 index 000000000..f100a0dfa --- /dev/null +++ b/src/hyperlight_host/src/sandbox/snapshot/file_tests.rs @@ -0,0 +1,1803 @@ +/* +Copyright 2025 The Hyperlight Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +//! Tests for the snapshot file format (`super::file`). + +#![cfg(test)] + +use std::sync::Arc; + +use hyperlight_testing::simple_guest_as_string; + +use super::file::{FIXED_PREFIX_SIZE, HypervisorTag, RawHeaderV1, RawPreamble}; +use crate::sandbox::snapshot::Snapshot; +use crate::{GuestBinary, HostFunctions, MultiUseSandbox, UninitializedSandbox}; + +/// Absolute file offset of a `RawHeaderV1` field. Computed from +/// the struct definition so it stays correct if the field order +/// changes. +macro_rules! v1_offset { + ($field:ident) => { + std::mem::size_of::() + std::mem::offset_of!(RawHeaderV1, $field) + }; +} + +fn create_test_sandbox() -> MultiUseSandbox { + let path = simple_guest_as_string().unwrap(); + UninitializedSandbox::new(GuestBinary::FilePath(path), None) + .unwrap() + .evolve() + .unwrap() +} + +fn create_snapshot_from_binary() -> Snapshot { + let path = simple_guest_as_string().unwrap(); + Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap() +} + +#[test] +fn from_snapshot_already_initialized_in_memory() { + // Test from_snapshot with a snapshot taken from an already-initialized + // sandbox (NextAction::Call), directly from memory without file I/O + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let new_snap = Snapshot { + sandbox_id: super::SANDBOX_CONFIGURATION_COUNTER + .fetch_add(1, std::sync::atomic::Ordering::Relaxed), + layout: *snapshot.layout(), + memory: snapshot.memory().clone(), + regions: snapshot.regions().to_vec(), + load_info: snapshot.load_info(), + hash: snapshot.hash, + stack_top_gva: snapshot.stack_top_gva(), + sregs: snapshot.sregs().cloned(), + entrypoint: snapshot.entrypoint(), + snapshot_generation: snapshot.snapshot_generation(), + host_functions: snapshot.host_functions.clone(), + }; + + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(new_snap), HostFunctions::default(), None).unwrap(); + let result: i32 = sbox2.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn from_snapshot_in_memory() { + // Test from_snapshot pathway using the existing Snapshot::from_env + let path = simple_guest_as_string().unwrap(); + let snap = Snapshot::from_env( + GuestBinary::FilePath(path), + crate::sandbox::SandboxConfiguration::default(), + ) + .unwrap(); + + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(snap), HostFunctions::default(), None).unwrap(); + + // from_env creates a snapshot with NextAction::Initialise, + // so from_snapshot will run the init code via vm.initialise() + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +#[test] +fn round_trip_save_load_call() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("test.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +#[test] +fn snapshot_and_pt_size_round_trip() { + // Running-sandbox snapshot. + let mut sbox = create_test_sandbox(); + let snap = sbox.snapshot().unwrap(); + let original_snapshot_size = snap.layout().snapshot_size; + let original_pt_size = snap.layout().pt_size; + + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("running.hls"); + snap.to_file(&path).unwrap(); + + let loaded = Snapshot::from_file(&path).unwrap(); + assert_eq!(loaded.layout().snapshot_size, original_snapshot_size); + assert_eq!(loaded.layout().pt_size, original_pt_size); + + // Pre-init snapshot. + let preinit = create_snapshot_from_binary(); + let preinit_snapshot_size = preinit.layout().snapshot_size; + let preinit_pt_size = preinit.layout().pt_size; + + let path = dir.path().join("preinit.hls"); + preinit.to_file(&path).unwrap(); + + let loaded = Snapshot::from_file(&path).unwrap(); + assert_eq!(loaded.layout().snapshot_size, preinit_snapshot_size); + assert_eq!(loaded.layout().pt_size, preinit_pt_size); +} + +#[test] +fn hash_verification_detects_corruption() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("corrupted.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt a byte in the memory blob (after the 4096-byte header) + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(4096 + 100)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(4096 + 100)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with hash mismatch"), + }; + assert!( + err_msg.contains("hash mismatch"), + "expected hash mismatch error, got: {}", + err_msg + ); +} + +#[test] +fn arch_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_arch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the architecture tag + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(arch) as u64)).unwrap(); + file.write_all(&99u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with arch mismatch"), + }; + assert!( + err_msg.contains("architecture"), + "expected arch-related error, got: {}", + err_msg + ); +} + +#[test] +fn format_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_version.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the format version + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start( + std::mem::offset_of!(RawPreamble, format_version) as u64, + )) + .unwrap(); + file.write_all(&999u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with version mismatch"), + }; + assert!( + err_msg.contains("format version"), + "expected version mismatch error, got: {}", + err_msg + ); + assert!( + err_msg.contains("convertible"), + "expected hint about convertibility, got: {}", + err_msg + ); +} + +#[test] +fn abi_version_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_abi.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the ABI version + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(abi_version) as u64)) + .unwrap(); + file.write_all(&999u32.to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with ABI version mismatch"), + }; + assert!( + err_msg.contains("ABI version mismatch"), + "expected ABI version mismatch error, got: {}", + err_msg + ); + assert!( + err_msg.contains("regenerated"), + "expected hint about regeneration, got: {}", + err_msg + ); +} + +#[test] +fn hypervisor_mismatch_rejected() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("wrong_hv.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the hypervisor tag with a valid but wrong tag. + let current = HypervisorTag::current().unwrap(); + let wrong_tag = match current { + HypervisorTag::Whp => HypervisorTag::Kvm, + _ => HypervisorTag::Whp, + }; + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(hypervisor) as u64)) + .unwrap(); + file.write_all(&(wrong_tag as u64).to_le_bytes()).unwrap(); + } + + let result = Snapshot::from_file(&snap_path); + let err_msg = match result { + Err(e) => format!("{}", e), + Ok(_) => panic!("expected load to fail with hypervisor mismatch"), + }; + assert!( + err_msg.contains("hypervisor mismatch"), + "expected hypervisor mismatch error, got: {}", + err_msg + ); +} + +#[test] +fn restore_from_loaded_snapshot() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("restore.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + // Mutate state + sbox.call::("AddToStatic", 42i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 42); + + // Take a new snapshot and restore to it + let snap2 = sbox.snapshot().unwrap(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 52); + + sbox.restore(snap2).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 42); +} + +#[test] +fn restore_to_original_file_snapshot() { + let mut sbox = create_test_sandbox(); + sbox.call::("AddToStatic", 10i32).unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("original.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox.call::("AddToStatic", 42i32).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 52); + + sbox.restore(loaded).unwrap(); + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(val, 10); +} + +/// Sandboxes built from clones of the same `Arc` must +/// be mutually `restore`-compatible (they share the same +/// `sandbox_id`). Conversely, two `Snapshot::from_file` calls of +/// the same path return distinct snapshots; that property is +/// exercised by `restore_to_different_file_loaded_snapshot_rejected`. +#[test] +fn sandboxes_from_shared_arc_snapshot_can_restore_to_each_other() { + let mut producer = create_test_sandbox(); + let snapshot = producer.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("shared_id.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + // Take an in-process snapshot from one sibling. That snapshot + // inherits its sandbox's id, which must match every other sandbox + // built from the same `Arc`. + sbox1.call::("AddToStatic", 7i32).unwrap(); + let mid_snap = sbox1.snapshot().unwrap(); + + let mut sbox2 = MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + // Restoring `sbox2` to a snapshot taken from `sbox1` must + // succeed because they share the same id. + sbox2.restore(mid_snap).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 7); +} + +/// A single `Arc` loaded from disk must be safely shared +/// across many `from_snapshot` calls. Each resulting sandbox gets +/// its own CoW view and must be independent of the others. +#[test] +fn many_sandboxes_share_single_arc_snapshot() { + const N: usize = 8; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("shared_arc.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sandboxes: Vec = (0..N) + .map(|_| { + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap() + }) + .collect(); + + // Each sandbox writes a unique value and must observe its own write. + for (i, sbox) in sandboxes.iter_mut().enumerate() { + sbox.call::("AddToStatic", (i as i32 + 1) * 10) + .unwrap(); + } + for (i, sbox) in sandboxes.iter_mut().enumerate() { + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + val, + (i as i32 + 1) * 10, + "sandbox {i} must observe its own write", + ); + } + + // Dropping the original Arc while sandboxes are still + // alive must not invalidate their CoW mappings. + drop(loaded); + for (i, sbox) in sandboxes.iter_mut().enumerate() { + let val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + val, + (i as i32 + 1) * 10, + "sandbox {i} must still work after the source Arc is dropped", + ); + } +} + +/// Multiple sandboxes built from the same on-disk snapshot must +/// behave correctly under concurrent use from multiple threads. +#[test] +fn concurrent_sandboxes_from_same_file() { + use std::thread; + + const N: usize = 8; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("concurrent.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let handles: Vec<_> = (0..N) + .map(|i| { + let loaded = loaded.clone(); + thread::spawn(move || { + let mut sbox = + MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + let increment = (i as i32 + 1) * 7; + for _ in 0..5 { + sbox.call::("AddToStatic", increment).unwrap(); + } + let final_val: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!( + final_val, + increment * 5, + "thread {i} must see its own writes" + ); + }) + }) + .collect(); + + for h in handles { + h.join().expect("thread panicked"); + } +} + +/// Snapshots loaded from the same file must be restorable +/// independently from each other after concurrent mutations. +#[test] +fn restore_works_per_sandbox_with_shared_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("restore_shared.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + sbox1.call::("AddToStatic", 100i32).unwrap(); + sbox2.call::("AddToStatic", 200i32).unwrap(); + + sbox1.restore(loaded.clone()).unwrap(); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 0); + // sbox2 must be unaffected by sbox1's restore. + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 200); + + sbox2.restore(loaded).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); +} + +/// Pre-init snapshots (NextAction::Initialise) round-tripped through +/// a file must be usable concurrently by multiple sandboxes. This is +/// distinct from already-initialised (`Call`) snapshots because each +/// sandbox runs the guest init code under `vm.initialise()`. +#[test] +fn multiple_sandboxes_from_pre_init_file() { + let snapshot = create_snapshot_from_binary(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("preinit_shared.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox1 = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + let mut sbox2 = MultiUseSandbox::from_snapshot(loaded, HostFunctions::default(), None).unwrap(); + + sbox1.call::("AddToStatic", 11i32).unwrap(); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 11); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 0); + + sbox2.call::("AddToStatic", 22i32).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 22); + assert_eq!(sbox1.call::("GetStatic", ()).unwrap(), 11); +} + +#[test] +fn snapshot_then_save_round_trip() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path1 = dir.path().join("first.hls"); + snapshot.to_file(&snap_path1).unwrap(); + + // Load, create sandbox, mutate, take snapshot, save again + let loaded = Snapshot::from_file(&snap_path1).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + sbox2.call::("AddToStatic", 77i32).unwrap(); + let snap2 = sbox2.snapshot().unwrap(); + + let snap_path2 = dir.path().join("second.hls"); + snap2.to_file(&snap_path2).unwrap(); + + // Load the second snapshot and verify mutated state + let loaded2 = Snapshot::from_file(&snap_path2).unwrap(); + let mut sbox3 = + MultiUseSandbox::from_snapshot(Arc::new(loaded2), HostFunctions::default(), None).unwrap(); + + let val: i32 = sbox3.call("GetStatic", ()).unwrap(); + assert_eq!(val, 77); +} + +/// `MultiUseSandbox::from_snapshot` should register the default +/// `HostPrint` host function, just like the regular codepath. +#[test] +fn from_snapshot_has_default_host_print() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("test.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let result = sbox2.call::("PrintOutput", "hello from snapshot".to_string()); + assert!( + result.is_ok(), + "PrintOutput should succeed because HostPrint is registered by from_snapshot: {:?}", + result.unwrap_err() + ); +} + +#[test] +fn from_file_unchecked_skips_hash_verification() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("unchecked.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt a byte in the memory blob (past the header) + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + // Write garbage into the memory blob region + file.seek(SeekFrom::Start(4096 + 64)).unwrap(); + file.write_all(&[0xFF; 16]).unwrap(); + } + + // from_file (with hash check) should fail + let result = Snapshot::from_file(&snap_path); + assert!(result.is_err(), "from_file should detect corruption"); + + // from_file_unchecked should succeed despite corruption + let loaded = Snapshot::from_file_unchecked(&snap_path); + assert!(loaded.is_ok(), "from_file_unchecked should skip hash check"); +} + +/// Sandbox built with a custom host function — a snapshot taken +/// from it must persist the function's signature, and loading +/// requires the same function to be registered. +fn create_sandbox_with_custom_host_funcs() -> MultiUseSandbox { + use crate::func::Registerable; + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + u.register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + u.evolve().unwrap() +} + +#[test] +fn from_snapshot_accepts_matching_host_functions() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("with_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + funcs + .register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + assert!( + result.is_ok(), + "from_snapshot should accept matching host fns: {:?}", + result.err() + ); +} + +#[test] +fn from_snapshot_rejects_missing_host_function() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("missing_fn.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + // Don't register "Add" — only the default HostPrint. + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None); + let err = result.expect_err("expected missing-fn rejection"); + let msg = format!("{}", err); + assert!( + msg.contains("missing") && msg.contains("Add"), + "unexpected error message: {}", + msg + ); +} + +#[test] +fn from_snapshot_rejects_signature_mismatch() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("sig_mismatch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + // Wrong signature: snapshot has (i32, i32) -> i32, register (String) -> i32. + funcs + .register_host_function("Add", |_s: String| Ok(0i32)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + let err = result.expect_err("expected signature mismatch"); + let msg = format!("{}", err); + assert!( + msg.contains("signature_mismatches") && msg.contains("Add"), + "unexpected error message: {}", + msg + ); +} + +#[test] +fn from_snapshot_allows_extra_host_functions() { + use crate::func::Registerable; + + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("extra_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + funcs + .register_host_function("Add", |a: i32, b: i32| Ok(a + b)) + .unwrap(); + // Extra functions not in the snapshot — superset is allowed. + funcs + .register_host_function("Extra", |x: i64| Ok(x * 2)) + .unwrap(); + let result = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None); + assert!( + result.is_ok(), + "extras should be allowed (superset semantics): {:?}", + result.err() + ); +} + +/// Register enough host functions on the sandbox that the +/// serialized `HostFunctionDetails` flatbuffer exceeds a single +/// page, exercising the variable-`memory_offset` path. Verifies +/// that the saved file round-trips cleanly and that +/// `from_snapshot` correctly accepts a matching set. +#[test] +fn from_snapshot_with_many_host_functions_round_trip() { + use hyperlight_common::vmem::PAGE_SIZE; + + use crate::func::Registerable; + + let path = simple_guest_as_string().unwrap(); + let mut u = UninitializedSandbox::new(GuestBinary::FilePath(path), None).unwrap(); + // Register many host functions with long names so the + // serialized flatbuffer comfortably exceeds PAGE_SIZE. + const N: usize = 200; + for i in 0..N { + let name = format!("HostFunc_with_a_reasonably_long_name_{:04}", i); + u.register_host_function(&name, |a: i32, b: i32| Ok(a + b)) + .unwrap(); + } + let mut sbox = u.evolve().unwrap(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("many_funcs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Sanity-check that the file's recorded memory_offset is + // larger than a single page (the host-function blob spilled + // beyond the fixed header). + { + use std::io::{Read, Seek, SeekFrom}; + let mut f = std::fs::File::open(&snap_path).unwrap(); + f.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + let mut buf = [0u8; 8]; + f.read_exact(&mut buf).unwrap(); + let memory_offset = u64::from_le_bytes(buf) as usize; + assert!( + memory_offset > PAGE_SIZE, + "expected memory_offset > PAGE_SIZE for large host_funcs (got {})", + memory_offset + ); + } + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut funcs = HostFunctions::default(); + for i in 0..N { + let name = format!("HostFunc_with_a_reasonably_long_name_{:04}", i); + funcs + .register_host_function(&name, |a: i32, b: i32| Ok(a + b)) + .unwrap(); + } + let mut sbox2 = MultiUseSandbox::from_snapshot(Arc::new(loaded), funcs, None).unwrap(); + let result: String = sbox2.call("Echo", "hello\n".to_string()).unwrap(); + assert_eq!(result, "hello\n"); +} + +/// A file with the wrong magic bytes should be rejected with a +/// descriptive error. +#[test] +fn bad_magic_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_magic.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite the 4-byte magic at offset 0. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + file.write_all(b"XXXX").unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected magic mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("magic"), + "expected magic-related error, got: {}", + msg + ); +} + +/// A file truncated to less than the fixed header should be +/// rejected at header read time, not panic. +#[test] +fn truncated_file_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("truncated.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Truncate to 100 bytes (well below the fixed header). + std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap() + .set_len(100) + .unwrap(); + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected truncation error"), + }; + let msg = format!("{}", err); + // Either "truncated" (read_bytes) or "snapshot read error" (read_u64) — + // both are acceptable; just assert no panic and an error came back. + assert!( + msg.contains("truncated") || msg.contains("read error"), + "expected truncation/read error, got: {}", + msg + ); +} + +/// A file whose `host_funcs_size` claims more bytes than the +/// host-funcs region actually contains should be rejected +/// without panic. +#[test] +fn corrupt_host_funcs_size_rejected() { + // Use a sandbox with at least one custom host function so the + // host-funcs region exists in the file. + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_hf_size.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Overwrite host_funcs_size with a huge value that exceeds + // the file. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(host_funcs_size) as u64)) + .unwrap(); + file.write_all(&u64::MAX.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected host_funcs_size error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("host_funcs_size"), + "expected host_funcs_size error, got: {}", + msg + ); +} + +/// A `host_funcs_size` that fits within the file but exceeds the +/// fixed cap must be rejected before the loader tries to allocate +/// a buffer of that size. +#[test] +fn oversized_host_funcs_size_rejected() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("oversized_hf.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Pad the file so a value that's well above the cap still + // fits within `file_len` (otherwise the existing + // "exceeds remaining file bytes" check would catch it first). + let bloated = 64 * 1024 * 1024_u64; + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + let cur_len = file.metadata().unwrap().len(); + file.seek(SeekFrom::Start(cur_len)).unwrap(); + file.write_all(&vec![0u8; bloated as usize]).unwrap(); + file.seek(SeekFrom::Start(v1_offset!(host_funcs_size) as u64)) + .unwrap(); + file.write_all(&bloated.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized host_funcs_size error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("exceeds maximum"), + "expected cap error, got: {}", + msg + ); +} + +/// `memory_offset` of 0 is structurally invalid because the memory +/// blob would overlap the fixed prefix. +#[test] +fn memory_offset_zero_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_offset_zero.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + file.write_all(&0u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected memory_offset=0 to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory_offset"), + "expected memory_offset error, got: {}", + msg + ); +} + +/// `memory_offset` must be a multiple of `PAGE_SIZE` so the memory +/// blob can be mmapped directly. A non-aligned offset must be +/// rejected. +#[test] +fn memory_offset_unaligned_rejected() { + use hyperlight_common::vmem::PAGE_SIZE; + + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_offset_unaligned.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(memory_offset) as u64)) + .unwrap(); + let bad = (PAGE_SIZE as u64) + 1; + file.write_all(&bad.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected unaligned memory_offset to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory_offset") && msg.contains("PAGE_SIZE"), + "expected page-alignment error, got: {}", + msg + ); +} + +/// `memory_size` that would push the memory blob past the end of +/// the file must be rejected. +#[test] +fn memory_blob_extends_past_eof_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("memory_size_overflow.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + let file_len = file.metadata().unwrap().len(); + file.seek(SeekFrom::Start(v1_offset!(memory_size) as u64)) + .unwrap(); + // A value that fits in u64 but is much larger than the + // file, so the blob bound check trips before any add + // overflows. + file.write_all(&(file_len * 2).to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized memory blob to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("memory blob") && msg.contains("end of the file"), + "expected blob-end error, got: {}", + msg + ); +} + +/// `entrypoint_tag` is a u64 discriminant for `NextAction`. Only +/// values 0 (Initialise) and 1 (Call) are defined. Anything else +/// must be rejected when parsing the raw header. +#[test] +fn invalid_entrypoint_tag_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_entrypoint_tag.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(entrypoint_tag) as u64)) + .unwrap(); + file.write_all(&0xDEADu64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected invalid entrypoint tag to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("entrypoint tag"), + "expected entrypoint-tag error, got: {}", + msg + ); +} + +/// `init_data_permissions` is stored as `u64` on disk but the in +/// memory flag set is `u32`. Any value with bits beyond the u32 +/// range must be rejected before narrowing so that high bits do not +/// silently disappear. +#[test] +fn init_data_permissions_oversized_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("oversized_perms.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(init_data_permissions) as u64)) + .unwrap(); + // High 32 bits set so `u32::try_from` fails. + file.write_all(&(1u64 << 33).to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected oversized init_data_permissions to be rejected"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("init_data_permissions") && msg.contains("u32"), + "expected u32-range error, got: {}", + msg + ); +} + +/// `has_sregs` is serialized as `u64` for on-disk uniformity but is +/// semantically a boolean. Any value other than 0 or 1 must be +/// rejected at parse time. +#[test] +fn invalid_has_sregs_value_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("bad_has_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&2u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected has_sregs validation error"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// A `Call` snapshot is mid-execution and must carry sregs. +/// Flipping `has_sregs` to 0 on such a snapshot must be rejected. +#[test] +fn call_snapshot_without_sregs_rejected() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("call_no_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&0u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected entrypoint/has_sregs mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// An `Initialise` snapshot has not yet run on the vCPU and must +/// not carry sregs. Flipping `has_sregs` to 1 must be rejected. +#[test] +fn initialise_snapshot_with_sregs_rejected() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("init_with_sregs.hls"); + snapshot.to_file(&snap_path).unwrap(); + + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(has_sregs) as u64)) + .unwrap(); + file.write_all(&1u64.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected entrypoint/has_sregs mismatch"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("has_sregs"), + "expected has_sregs error, got: {}", + msg + ); +} + +/// `header_hash` covers the preamble, header, sregs, and host_funcs +/// blob. Any mutation of those regions must trip verification, even +/// via `from_file_unchecked`. +#[test] +fn header_mutation_caught_by_hash() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("hdr_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Flip a byte in `stack_top_gva` to mutate the header in place. + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(stack_top_gva) as u64)) + .unwrap(); + let mut buf = [0u8; 8]; + file.read_exact(&mut buf).unwrap(); + buf[0] ^= 0xFF; + file.seek(SeekFrom::Start(v1_offset!(stack_top_gva) as u64)) + .unwrap(); + file.write_all(&buf).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("header mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); + + // `from_file_unchecked` skips the blob hash but still verifies + // the header hash, so it must also reject this. + let err = match Snapshot::from_file_unchecked(&snap_path) { + Err(e) => e, + Ok(_) => panic!("header mutation must be detected even by from_file_unchecked"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// Sregs sit between the header and the host_funcs blob. Mutating +/// any sregs byte must trip `header_hash` verification. +#[test] +fn sregs_mutation_caught_by_hash() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("sregs_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Flip the first byte of the sregs region (just after the + // RawHeaderV1 ends). + let sregs_offset = std::mem::size_of::() + std::mem::size_of::(); + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(sregs_offset as u64)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(sregs_offset as u64)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("sregs mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// The host-functions flatbuffer blob is part of `header_hash`. +/// Mutating its bytes must trip verification. +#[test] +fn host_funcs_mutation_caught_by_hash() { + let mut sbox = create_sandbox_with_custom_host_funcs(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("hf_mut.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let hf_offset = FIXED_PREFIX_SIZE; + { + use std::io::{Read, Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .read(true) + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(hf_offset as u64)).unwrap(); + let mut byte = [0u8; 1]; + file.read_exact(&mut byte).unwrap(); + byte[0] ^= 0xFF; + file.seek(SeekFrom::Start(hf_offset as u64)).unwrap(); + file.write_all(&byte).unwrap(); + } + + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("host_funcs mutation must be detected"), + }; + assert!( + format!("{}", err).contains("header_hash"), + "expected header_hash error, got: {}", + err + ); +} + +/// `MAP_PRIVATE` / `FILE_MAP_COPY` invariant: guest writes +/// through a file-backed snapshot must NOT modify the on-disk +/// file. Verifies this by hashing the raw bytes before and after +/// running guest functions that mutate state. +#[test] +fn cow_does_not_mutate_backing_file() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("cow.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let hash_before: [u8; 32] = blake3::hash(&std::fs::read(&snap_path).unwrap()).into(); + + // Load the snapshot and have the guest write into mapped memory. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + sbox.call::("AddToStatic", 999i32).unwrap(); + + // Drop the sandbox to ensure mappings are released before re-reading. + drop(sbox); + + let hash_after: [u8; 32] = blake3::hash(&std::fs::read(&snap_path).unwrap()).into(); + assert_eq!( + hash_before, hash_after, + "guest writes must not propagate to the backing snapshot file" + ); +} + +/// Pre-init snapshot (`from_env`) round-tripped through a file +/// must still complete guest initialisation on load. +#[test] +fn pre_init_snapshot_save_load() { + use super::NextAction; + + let snapshot = create_snapshot_from_binary(); + // Guard: this constructor produces a `NextAction::Initialise` + // snapshot. If that ever changes, this test loses its purpose. + assert!( + matches!(snapshot.entrypoint(), NextAction::Initialise(_)), + "expected pre-init snapshot from from_env" + ); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("preinit.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded = Snapshot::from_file(&snap_path).unwrap(); + assert!( + matches!(loaded.entrypoint(), NextAction::Initialise(_)), + "pre-init entrypoint should round-trip" + ); + + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + // Guest init must run via vm.initialise() before the call works. + let result: i32 = sbox.call("GetStatic", ()).unwrap(); + assert_eq!(result, 0); +} + +/// `from_file_unchecked` skips the blake3 hash check but must +/// still validate the rest of the header (magic, format version, +/// architecture, ABI version, hypervisor tag). +#[test] +fn from_file_unchecked_still_validates_header() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("unchecked_bad_arch.hls"); + snapshot.to_file(&snap_path).unwrap(); + + // Corrupt the architecture tag to a bogus value. + { + use std::io::{Seek, SeekFrom, Write}; + let mut file = std::fs::OpenOptions::new() + .write(true) + .open(&snap_path) + .unwrap(); + file.seek(SeekFrom::Start(v1_offset!(arch) as u64)).unwrap(); + file.write_all(&99u32.to_le_bytes()).unwrap(); + } + + let err = match Snapshot::from_file_unchecked(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected arch validation to fail even without hash check"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("architecture"), + "expected arch error from from_file_unchecked, got: {}", + msg + ); +} + +// Tests for `MultiUseSandbox::from_snapshot` `SandboxConfiguration` +// plumbing. Layout fields must be silently overridden by the snapshot. +// Runtime fields (interrupt knobs, gdb, crashdump) must take effect. +// `interrupt_*` are covered by `interrupt_custom_signal_no_and_retry_delay` +// in `tests/integration_test.rs`. `guest_debug_info` (gdb) needs an +// in-test gdb stub and is not exercised here. + +/// Layout fields supplied via `SandboxConfiguration` must be silently +/// overridden. The snapshot's own layout is authoritative because the +/// on-disk memory blob already encodes those sizes. +#[test] +fn from_snapshot_silently_ignores_layout_overrides() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + let original_input = snapshot.layout().input_data_size; + let original_output = snapshot.layout().output_data_size; + let original_heap = snapshot.layout().heap_size; + let original_scratch = snapshot.layout().scratch_size; + + // Build a config whose every layout field is different from the + // snapshot's layout. `from_snapshot` must ignore all of them. + let mut config = SandboxConfiguration::default(); + config.set_input_data_size(original_input * 2); + config.set_output_data_size(original_output * 2); + config.set_heap_size((original_heap as u64) * 2); + config.set_scratch_size(original_scratch * 2); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot.clone(), HostFunctions::default(), Some(config)) + .unwrap(); + + // The new sandbox must be fully usable. + sbox2.call::("GetStatic", ()).unwrap(); + + // The new sandbox's layout must match the snapshot's, not the + // override config. + let new_snap = sbox2.snapshot().unwrap(); + assert_eq!(new_snap.layout().input_data_size, original_input); + assert_eq!(new_snap.layout().output_data_size, original_output); + assert_eq!(new_snap.layout().heap_size, original_heap); + assert_eq!(new_snap.layout().scratch_size, original_scratch); +} + +/// `from_snapshot` must honor `guest_core_dump=true` from the supplied +/// config so that `generate_crashdump_to_dir` actually writes a file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_enabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(true); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + !entries.is_empty(), + "expected core dump file to be created when guest_core_dump=true" + ); +} + +/// `from_snapshot` must honor `guest_core_dump=false` from the supplied +/// config so that `generate_crashdump_to_dir` produces no file. +#[test] +#[cfg(crashdump)] +fn from_snapshot_honors_guest_core_dump_disabled() { + use crate::sandbox::SandboxConfiguration; + + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let mut config = SandboxConfiguration::default(); + config.set_guest_core_dump(false); + + let mut sbox2 = + MultiUseSandbox::from_snapshot(snapshot, HostFunctions::default(), Some(config)).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + sbox2 + .generate_crashdump_to_dir(dir.path().to_str().unwrap()) + .unwrap(); + + let entries: Vec<_> = std::fs::read_dir(dir.path()) + .unwrap() + .filter_map(Result::ok) + .collect(); + assert!( + entries.is_empty(), + "expected no core dump file when guest_core_dump=false, found {:?}", + entries.iter().map(|e| e.path()).collect::>() + ); +} + +/// `from_file` on a non-existent path must return an error rather +/// than panicking. +#[test] +fn from_file_nonexistent_path_returns_error() { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("does_not_exist.hls"); + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected I/O error for missing file"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("failed to open snapshot file"), + "expected open-failure message, got: {}", + msg + ); +} + +/// `to_file` must succeed when overwriting an existing file, and +/// the resulting file must be loadable. +#[test] +fn to_file_overwrites_existing() { + let mut sbox = create_test_sandbox(); + let snap1 = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("overwrite.hls"); + + snap1.to_file(&snap_path).unwrap(); + let first_size = std::fs::metadata(&snap_path).unwrap().len(); + assert!(first_size > 0); + + // Mutate sandbox state and snapshot again, overwriting the same file. + sbox.call::("AddToStatic", 314i32).unwrap(); + let snap2 = sbox.snapshot().unwrap(); + snap2.to_file(&snap_path).unwrap(); + + // Load the overwritten file and verify it observes the second + // snapshot's state. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 314); +} + +/// `from_snapshot`-built sandbox must support `map_file_cow` of a +/// host file and the guest must read back the file contents. +#[test] +fn map_file_cow_after_from_snapshot() { + use std::io::Write; + + // Build a snapshot from disk. + let mut producer = create_test_sandbox(); + let snap = producer.snapshot().unwrap(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("for_map.hls"); + snap.to_file(&snap_path).unwrap(); + + // Build a host file with known contents to map into the sandbox. + let page_size = page_size::get(); + let payload = b"hello from map_file_cow after from_snapshot"; + let mut padded = vec![0u8; page_size]; + padded[..payload.len()].copy_from_slice(payload); + let file_path = dir.path().join("mapped_payload.bin"); + std::fs::File::create(&file_path) + .unwrap() + .write_all(&padded) + .unwrap(); + + // Construct a sandbox from the on-disk snapshot and map the file. + let loaded = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox = + MultiUseSandbox::from_snapshot(Arc::new(loaded), HostFunctions::default(), None).unwrap(); + + let guest_base: u64 = 0x1_0000_0000; + let mapped_size = sbox.map_file_cow(&file_path, guest_base, None).unwrap(); + assert!(mapped_size as usize >= payload.len()); + + // Read back from the guest and verify byte-for-byte equality. + let actual: Vec = sbox + .call("ReadMappedBuffer", (guest_base, payload.len() as u64, true)) + .unwrap(); + assert_eq!(actual, payload); +} + +/// A sandbox restored from a file-loaded snapshot must still be +/// snapshottable, and the new snapshot must save and reload +/// correctly. +#[test] +fn snapshot_after_restore_to_file_loaded_baseline() { + let mut producer = create_test_sandbox(); + let baseline = producer.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let baseline_path = dir.path().join("baseline.hls"); + baseline.to_file(&baseline_path).unwrap(); + + let loaded = Arc::new(Snapshot::from_file(&baseline_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded.clone(), HostFunctions::default(), None).unwrap(); + + // Mutate, restore to the file baseline, mutate to a new value, + // then snapshot the post-restore sandbox. + sbox.call::("AddToStatic", 7i32).unwrap(); + sbox.restore(loaded).unwrap(); + assert_eq!(sbox.call::("GetStatic", ()).unwrap(), 0); + sbox.call::("AddToStatic", 99i32).unwrap(); + + let new_snap = sbox.snapshot().unwrap(); + let new_path = dir.path().join("after_restore.hls"); + new_snap.to_file(&new_path).unwrap(); + + // Load the new snapshot in a fresh sandbox and verify state. + let reloaded = Snapshot::from_file(&new_path).unwrap(); + let mut sbox2 = + MultiUseSandbox::from_snapshot(Arc::new(reloaded), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox2.call::("GetStatic", ()).unwrap(), 99); +} + +/// `from_file` on an empty file must return an error rather than +/// panicking. +#[test] +fn from_file_empty_file_returns_error() { + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("empty.hls"); + std::fs::File::create(&snap_path).unwrap(); + let err = match Snapshot::from_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected error from zero-byte file"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("snapshot read error") || msg.contains("truncated"), + "expected truncation/read error, got: {}", + msg + ); +} + +/// `to_file` to a path inside a non-existent directory must return +/// an I/O error rather than panicking. +#[test] +fn to_file_nonexistent_directory_returns_error() { + let snapshot = create_snapshot_from_binary(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("does_not_exist").join("snap.hls"); + let err = match snapshot.to_file(&snap_path) { + Err(e) => e, + Ok(_) => panic!("expected error writing to nonexistent directory"), + }; + let msg = format!("{}", err); + assert!( + msg.contains("failed to create snapshot file"), + "expected create-failure message, got: {}", + msg + ); +} + +/// Restore is currently rejected when the target snapshot was +/// loaded from a different file than the sandbox was built from. +/// `Snapshot::sandbox_id` is a process-local atomic counter assigned +/// fresh on every `from_file`, so the ids never match. Documented as +/// a known limitation in `MultiUseSandbox::from_snapshot` and tracked +/// by the `TODO` to replace ids with a `SandboxMemoryLayout`-equality +/// check. +#[test] +fn restore_to_different_file_loaded_snapshot_rejected() { + let mut producer = create_test_sandbox(); + + let dir = tempfile::tempdir().unwrap(); + + // Snapshot A: zero state. + let snap_a_path = dir.path().join("a.hls"); + producer.snapshot().unwrap().to_file(&snap_a_path).unwrap(); + + // Snapshot B: state with AddToStatic(50). + producer.call::("AddToStatic", 50i32).unwrap(); + let snap_b_path = dir.path().join("b.hls"); + producer.snapshot().unwrap().to_file(&snap_b_path).unwrap(); + + let loaded_a = Arc::new(Snapshot::from_file(&snap_a_path).unwrap()); + let mut sbox = + MultiUseSandbox::from_snapshot(loaded_a, HostFunctions::default(), None).unwrap(); + + let loaded_b = Arc::new(Snapshot::from_file(&snap_b_path).unwrap()); + let err = match sbox.restore(loaded_b) { + Err(e) => e, + Ok(_) => panic!("expected SnapshotSandboxMismatch from cross-file restore"), + }; + let msg = format!("{:?}", err); + assert!( + msg.contains("SnapshotSandboxMismatch"), + "expected SnapshotSandboxMismatch, got: {}", + msg + ); +} + +/// Two independent `Snapshot::from_file` calls of the same path +/// must each yield a usable snapshot. Sandboxes built from each +/// must work independently and produce isolated CoW state. +#[test] +fn multiple_from_file_calls_of_same_path() { + let mut sbox = create_test_sandbox(); + let snapshot = sbox.snapshot().unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("multi_load.hls"); + snapshot.to_file(&snap_path).unwrap(); + + let loaded_a = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + let loaded_b = Arc::new(Snapshot::from_file(&snap_path).unwrap()); + + let mut sbox_a = + MultiUseSandbox::from_snapshot(loaded_a, HostFunctions::default(), None).unwrap(); + let mut sbox_b = + MultiUseSandbox::from_snapshot(loaded_b, HostFunctions::default(), None).unwrap(); + + sbox_a.call::("AddToStatic", 11i32).unwrap(); + sbox_b.call::("AddToStatic", 22i32).unwrap(); + + assert_eq!(sbox_a.call::("GetStatic", ()).unwrap(), 11); + assert_eq!(sbox_b.call::("GetStatic", ()).unwrap(), 22); +} + +/// Loading a file via `Snapshot::from_file` after the file has been +/// rewritten with a different snapshot must observe the new contents. +/// Documents the load-once / no-cache semantic. +#[test] +fn from_file_after_overwrite_observes_new_contents() { + let mut sbox = create_test_sandbox(); + let dir = tempfile::tempdir().unwrap(); + let snap_path = dir.path().join("evolving.hls"); + + // Write a snapshot at state X. + sbox.snapshot().unwrap().to_file(&snap_path).unwrap(); + // Load and immediately drop. On Windows, an overwriting `to_file` + // call on a path with an active mapped view fails with + // `ERROR_USER_MAPPED_FILE` (1224), so the loaded snapshot must be + // released before re-writing the same path. + { + let _loaded_x = Snapshot::from_file(&snap_path).unwrap(); + } + + // Mutate and overwrite with a snapshot at state Y. + sbox.call::("AddToStatic", 55i32).unwrap(); + sbox.snapshot().unwrap().to_file(&snap_path).unwrap(); + + // A subsequent `from_file` of the same path must reflect Y. + let loaded_y = Snapshot::from_file(&snap_path).unwrap(); + let mut sbox_y = + MultiUseSandbox::from_snapshot(Arc::new(loaded_y), HostFunctions::default(), None).unwrap(); + assert_eq!(sbox_y.call::("GetStatic", ()).unwrap(), 55); +} diff --git a/src/hyperlight_host/src/sandbox/snapshot.rs b/src/hyperlight_host/src/sandbox/snapshot/mod.rs similarity index 91% rename from src/hyperlight_host/src/sandbox/snapshot.rs rename to src/hyperlight_host/src/sandbox/snapshot/mod.rs index e4c7b1133..1bb183ea8 100644 --- a/src/hyperlight_host/src/sandbox/snapshot.rs +++ b/src/hyperlight_host/src/sandbox/snapshot/mod.rs @@ -14,9 +14,13 @@ See the License for the specific language governing permissions and limitations under the License. */ +mod file; +mod file_tests; + use std::collections::{BTreeMap, HashMap}; use std::sync::atomic::{AtomicU64, Ordering}; +use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::layout::{scratch_base_gpa, scratch_base_gva}; use hyperlight_common::vmem; use hyperlight_common::vmem::{ @@ -115,6 +119,16 @@ pub struct Snapshot { /// restored sandbox's guest-visible counter so the guest can tell /// which snapshot it is currently a clone of. snapshot_generation: u64, + + /// Names and signatures of host functions registered on the + /// sandbox at the time this snapshot was taken. Persisted to disk + /// so that [`crate::MultiUseSandbox::from_snapshot`] can reject + /// a `HostFunctions` set that is missing required functions or + /// has mismatched signatures. + /// + /// Empty for snapshots created via test-only constructors that + /// bypass the normal sandbox path. + host_functions: HostFunctionDetails, } impl core::convert::AsRef for Snapshot { fn as_ref(&self) -> &Self { @@ -423,6 +437,9 @@ impl Snapshot { sregs: None, entrypoint: NextAction::Initialise(load_addr + entrypoint_va - base_va), snapshot_generation: 0, + host_functions: HostFunctionDetails { + host_functions: None, + }, }) } @@ -447,6 +464,7 @@ impl Snapshot { sregs: CommonSpecialRegisters, entrypoint: NextAction, snapshot_generation: u64, + host_functions: HostFunctionDetails, ) -> Result { let mut phys_seen = HashMap::::new(); let scratch_gva = scratch_base_gva(layout.get_scratch_size()); @@ -610,6 +628,7 @@ impl Snapshot { sregs: Some(sregs), entrypoint, snapshot_generation, + host_functions, }) } @@ -663,6 +682,65 @@ impl Snapshot { pub(crate) fn entrypoint(&self) -> NextAction { self.entrypoint } + + /// Validate that `provided` is a superset of the host functions + /// recorded in this snapshot: every function that was registered + /// at snapshot time must also be present in `provided` with a + /// matching signature. Extras in `provided` are allowed. + /// + /// A snapshot with no recorded host functions (e.g. one + /// produced by a test-only constructor) accepts any `provided` + /// set. + pub(crate) fn validate_host_functions(&self, provided: &crate::HostFunctions) -> Result<()> { + let required = match &self.host_functions.host_functions { + Some(v) => v, + None => return Ok(()), + }; + if required.is_empty() { + return Ok(()); + } + + // Build a HostFunctionDetails view of the provided registry + // using the existing `From<&FunctionRegistry>` impl. + let provided_details: HostFunctionDetails = provided.inner().into(); + let provided_funcs = provided_details.host_functions.unwrap_or_default(); + + let mut missing: Vec = Vec::new(); + let mut signature_mismatches: Vec = Vec::new(); + + for req in required { + match provided_funcs + .iter() + .find(|f| f.function_name == req.function_name) + { + None => missing.push(req.function_name.clone()), + Some(found) + if found.parameter_types != req.parameter_types + || found.return_type != req.return_type => + { + signature_mismatches.push(format!( + "{}: snapshot has {:?} -> {:?}, registered {:?} -> {:?}", + req.function_name, + req.parameter_types, + req.return_type, + found.parameter_types, + found.return_type, + )); + } + Some(_) => {} + } + } + + if missing.is_empty() && signature_mismatches.is_empty() { + return Ok(()); + } + + Err(crate::new_error!( + "snapshot host function mismatch: missing={:?}, signature_mismatches={:?}", + missing, + signature_mismatches + )) + } } impl PartialEq for Snapshot { @@ -674,6 +752,7 @@ impl PartialEq for Snapshot { #[cfg(test)] #[cfg(not(feature = "i686-guest"))] mod tests { + use hyperlight_common::flatbuffer_wrappers::host_function_details::HostFunctionDetails; use hyperlight_common::vmem::{self, BasicMapping, Mapping, MappingKind, PAGE_SIZE}; use crate::hypervisor::regs::CommonSpecialRegisters; @@ -747,6 +826,7 @@ mod tests { default_sregs(), super::NextAction::None, 1, + HostFunctionDetails::default(), ) .unwrap(); @@ -764,6 +844,7 @@ mod tests { default_sregs(), super::NextAction::None, 2, + HostFunctionDetails::default(), ) .unwrap(); diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 23c01be28..b65b1e655 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -22,7 +22,7 @@ use std::sync::{Arc, Mutex}; use tracing::{Span, instrument}; use tracing_core::LevelFilter; -use super::host_funcs::{FunctionRegistry, default_writer_func}; +use super::host_funcs::FunctionRegistry; use super::snapshot::Snapshot; use super::uninitialized_evolve::evolve_impl_multi_use; use crate::func::host_functions::{HostFunction, register_host_function}; @@ -365,9 +365,9 @@ impl UninitializedSandbox { let mem_mgr_wrapper = SandboxMemoryManager::::from_snapshot(snapshot.as_ref())?; - let host_funcs = Arc::new(Mutex::new(FunctionRegistry::default())); + let host_funcs = Arc::new(Mutex::new(FunctionRegistry::with_default_host_print())); - let mut sandbox = Self { + let sandbox = Self { host_funcs, mgr: mem_mgr_wrapper, max_guest_log_level: None, @@ -383,9 +383,6 @@ impl UninitializedSandbox { pending_file_mappings: Vec::new(), }; - // If we were passed a writer for host print register it otherwise use the default. - sandbox.register_print(default_writer_func)?; - crate::debug!("Sandbox created: {:#?}", sandbox); Ok(sandbox) diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 7f0cc1c0d..c037af06e 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -73,7 +73,7 @@ pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result() }; let peb_addr = { - let peb_u64 = u64::try_from(hshm.layout.peb_address)?; + let peb_u64 = u64::try_from(hshm.layout.peb_address())?; RawPtr::from(peb_u64) };