From d439c3909a012a39065054c41bfa85e9dba83926 Mon Sep 17 00:00:00 2001 From: Longfang Zhao Date: Tue, 9 Jun 2026 16:42:26 -0700 Subject: [PATCH 1/2] Cross-load packed weight cache reuse for XNNPACK Summary: Add cross-load reuse + multi-PTE safety to the file-backed packed weight cache (D106673663). The first PTE in a session calls `save_packed_index()` to append a trailer; subsequent process launches mmap the file and pre-populate `name_to_packed_data_metadata_` so `look_up()` hits for every saved weight and `xnn_create_runtime` skips packing entirely. ## Cache file format ``` [packed data regions] (written by reserve_space) [index entries] (written by save_packed_index) each: name_len(4B) | name(N) | file_offset(8B) | data_size(8B) [footer: 20 bytes] index_start(8B) | entry_count(4B) | magic "XPWC"(4B) | version(4B) ``` ## Lifecycle invariants - `cache_loaded_` gate: `load_packed_cache()` runs at most once per process per path. Subsequent PTE inits for the same path reopen the write fd without re-reading the trailer. - `from_load` flag: persistent entries (loaded from trailer or promoted on save) skip `delete_packed_data` cleanup. This keeps the mmap region and metadata alive across PTE unload/reload, so the next init hits the cache instead of repacking. Without this, every PTE destroy/recreate cycle appended a fresh copy to the file (~450 MB per cycle). - No-op save short-circuit: `save_packed_index` returns early when no new `reserve_space` happened since the last save, avoiding the mtime churn that previously made the cache file look modified on every model load. ## Multi-PTE behavior - Multiple PTEs (or methods that don't share weights) in the same model load share one cache file. Each PTE's `reserve_space` extends the file; `finalize_for_runtime` msyncs only newly added regions; `save_packed_index` writes one trailer covering all PTEs at the end of the load. - Sibling PTEs that opt out of the mmap path (caller passes empty `packed_cache_path`) early-return from `initialize_for_runtime` and fall through to heap allocation, without touching the singleton's PLLM state. - Cross-model coexistence relies on caller-side discipline: only models that opt in set a non-empty cache path. Setting different non-empty paths concurrently is not supported by this singleton design. ## Caller change `XNNPACKBackend::init` always calls `set_packed_cache_path` (with empty string for non-opted-in PTEs). This keeps the singleton path in sync with the current PTE instead of inheriting a sibling's path. ## Test Plan ``` buck2 test fbcode//executorch/backends/xnnpack/test:test_xnn_weights_cache # 5 pass buck2 build fbsource//xplat/executorch/backends/xnnpack:xnnpack_backendApple buck2 build fbsource//xplat/executorch/backends/xnnpack:xnnpack_backend buck2 build fbcode//executorch/backends/xnnpack:xnnpack_backend ``` On device (iOS Stella build, PLLM + Llama3 runner): - Cold start: load `(1184 entries)` from cache, `reserve_mmap=0` for cached weights - Cache file size stable at ~593 MB across PLLM unload/reload cycles - `app_peak ~700 MB` (vs ~2.5 GB pre-fix) - `compressed ~100 MB` (vs ~1.7 GB pre-fix) Differential Revision: D106717093 --- backends/xnnpack/runtime/XNNPACKBackend.cpp | 12 +- backends/xnnpack/runtime/XNNPACKBackend.h | 3 + backends/xnnpack/runtime/XNNWeightsCache.cpp | 462 ++++++++++++++---- backends/xnnpack/runtime/XNNWeightsCache.h | 35 +- .../test/runtime/test_xnn_weights_cache.cpp | 393 +++++++++++++++ 5 files changed, 807 insertions(+), 98 deletions(-) diff --git a/backends/xnnpack/runtime/XNNPACKBackend.cpp b/backends/xnnpack/runtime/XNNPACKBackend.cpp index 3a5d6ab7958..6e1d3b042a5 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.cpp +++ b/backends/xnnpack/runtime/XNNPACKBackend.cpp @@ -100,9 +100,7 @@ class XnnpackBackend final lock_weights_cache.lock(); const auto& cache_path = options_.get_packed_cache_path(); - if (!cache_path.empty()) { - weights_cache_->set_packed_cache_path(cache_path); - } + weights_cache_->set_packed_cache_path(cache_path); weights_cache_->initialize_for_runtime( context.get_runtime_allocator(), named_data_map); @@ -219,6 +217,14 @@ class XnnpackBackend final BackendOptionContext& context, const Span& backend_options) override { for (const auto& option : backend_options) { + if (strcmp(option.key, xnnpack::save_packed_index_option_key) == 0) { + auto* val = std::get_if(&option.value); + if (val && *val) { + const std::lock_guard lock(weights_cache_mutex_); + return weights_cache_->save_packed_index(); + } + continue; + } Error err = options_.set_option(option); if (err != Error::Ok) { return err; diff --git a/backends/xnnpack/runtime/XNNPACKBackend.h b/backends/xnnpack/runtime/XNNPACKBackend.h index e3492c3f5f3..ac9d42b9fbf 100644 --- a/backends/xnnpack/runtime/XNNPACKBackend.h +++ b/backends/xnnpack/runtime/XNNPACKBackend.h @@ -20,6 +20,9 @@ const char weight_cache_option_key[] = "weight_cache_enabled"; // @lint-ignore CLANGTIDY facebook-hte-CArray const char packed_cache_path_option_key[] = "packed_cache_path"; +/// Trigger saving the packed weight index for cross-load cache reuse. +const char save_packed_index_option_key[] = "save_packed_index"; + /// Workspace sharing mode. This is a backend option that can be set via the /// set_option API to control memory sharing between CALL_DELEGATE instances. /// This is useful for reducing memory consumption. diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp index 70c410e5729..9eab694d529 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.cpp +++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp @@ -24,6 +24,7 @@ #include #include + namespace executorch { namespace backends { namespace xnnpack { @@ -32,6 +33,8 @@ namespace delegate { using executorch::ET_RUNTIME_NAMESPACE::NamedDataMap; using executorch::runtime::MemoryAllocator; + + XNNWeightsCache::XNNWeightsCache() { weights_cache_.context = this; weights_cache_.look_up = (size_t(*)( @@ -63,6 +66,72 @@ XNNWeightsCache::~XNNWeightsCache() { #endif } +// Trivial helpers for little-endian byte serialization of the trailer. +template +static void append_le(std::vector& buf, T value) { + const auto* p = reinterpret_cast(&value); + buf.insert(buf.end(), p, p + sizeof(T)); +} + +template +static T read_le(const uint8_t* src) { + T value; + memcpy(&value, src, sizeof(T)); + return value; +} + +#ifndef _WIN32 +// Open the cache file and take an advisory exclusive lock. Returns the +// fd, or -1 if open/flock failed (logs the failure). The caller decides +// how to recover (typically: skip the mmap path for this init). +static int open_locked(const std::string& path, int flags) { + int fd = open(path.c_str(), flags, 0600); + if (fd < 0) { + ET_LOG(Error, "open(%s) failed (errno=%d)", path.c_str(), errno); + return -1; + } + if (flock(fd, LOCK_EX | LOCK_NB) != 0) { + ET_LOG(Error, "flock(%s) failed (errno=%d)", path.c_str(), errno); + close(fd); + return -1; + } + return fd; +} + +// Drop in-memory state that referenced a now-truncated cache file. +// Heap-backed entries (live in packed_pointer_to_container_) stay; their +// packed_data_ptrs_ slots remain valid so existing offsets don't shift. +void XNNWeightsCache::reset_for_fresh_write() { + for (auto& region : mmap_regions_) { + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + } + } + mmap_regions_.clear(); + mmap_regions_synced_ = 0; + packed_file_used_ = 0; + ptr_to_file_offset_.clear(); + file_ptr_to_region_index_.clear(); + for (auto it = name_to_packed_data_metadata_.begin(); + it != name_to_packed_data_metadata_.end();) { + bool is_heap_backed = false; + if (it->second.offset < packed_data_ptrs_.size()) { + void* ptr = packed_data_ptrs_[it->second.offset]; + if (ptr != nullptr && + packed_pointer_to_container_.find(ptr) != + packed_pointer_to_container_.end()) { + is_heap_backed = true; + } + } + if (is_heap_backed) { + ++it; + } else { + it = name_to_packed_data_metadata_.erase(it); + } + } +} +#endif + Error XNNWeightsCache::initialize_for_runtime( MemoryAllocator* runtime_allocator, const NamedDataMap* named_data_map) { @@ -71,38 +140,52 @@ Error XNNWeightsCache::initialize_for_runtime( is_finalized_ = false; #ifndef _WIN32 - // Open the file for packed weights. Each reserve_space() call - // independently mmaps a region of the file. Once packed_file_disabled_ - // is set we never re-open — re-opening with O_TRUNC would corrupt any - // still-live mappings into the same path and cause SIGBUS on access. - if (!packed_cache_path_.empty() && packed_file_fd_ < 0 && - !packed_file_disabled_) { - packed_file_fd_ = - open(packed_cache_path_.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0600); - if (packed_file_fd_ < 0) { - ET_LOG( - Error, - "Failed to open packed weight file: %s (errno=%d)", - packed_cache_path_.c_str(), - errno); - } else if (flock(packed_file_fd_, LOCK_EX | LOCK_NB) != 0) { - // Another XNNWeightsCache instance (this process or another) is - // already using this path. O_TRUNC above would corrupt its mappings. - // Disable mmap for this instance to prevent collision; fall back to - // heap allocation for the remainder of this cache's lifetime. - ET_LOG( - Error, - "Another instance is using packed weight cache file %s (errno=%d); " - "disabling mmap path", - packed_cache_path_.c_str(), - errno); - close(packed_file_fd_); - packed_file_fd_ = -1; - packed_file_disabled_ = true; - } else { - ET_LOG(Info, "Opened packed weight file: %s", packed_cache_path_.c_str()); - } + if (packed_cache_path_.empty() || packed_file_fd_ >= 0) { + return Error::Ok; + } + + // Already loaded earlier this session; just reopen the write fd that + // save_packed_index() closed. Subsequent reserve_space can extend the + // file for any entries not in the saved trailer. + if (cache_loaded_) { + packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR); + return Error::Ok; + } + + // First init for this path: try to load the saved trailer; on success + // open a write fd for any new entries. If load fails, fall through to + // fresh-write below. + if (load_packed_cache()) { + ET_LOG( + Info, + "Loaded packed weight cache: %s (%zu entries)", + packed_cache_path_.c_str(), + name_to_packed_data_metadata_.size()); + packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR); + return Error::Ok; + } + + // Fresh write. Skip O_TRUNC in open_locked so a concurrent holder's + // mmap stays valid; truncate explicitly only after we hold the lock. + packed_file_fd_ = open_locked(packed_cache_path_, O_RDWR | O_CREAT); + if (packed_file_fd_ < 0) { + return Error::Ok; + } + if (ftruncate(packed_file_fd_, 0) != 0) { + ET_LOG( + Error, + "ftruncate(0) failed for %s (errno=%d); heap fallback this init", + packed_cache_path_.c_str(), + errno); + close(packed_file_fd_); + packed_file_fd_ = -1; + return Error::Ok; } + reset_for_fresh_write(); + ET_LOG( + Info, + "Opened packed weight file for writing: %s", + packed_cache_path_.c_str()); #endif return Error::Ok; @@ -130,6 +213,7 @@ Result> XNNWeightsCache::finalize_for_runtime() { } } + #ifndef _WIN32 // Schedule async flush for newly added regions only. // MS_ASYNC returns immediately; OS flushes in the background. @@ -164,59 +248,86 @@ Result XNNWeightsCache::load_unpacked_data( static_cast(named_data.get().data()); unpacked_data_.push_back(std::move(named_data.get())); unpacked_data_to_name_[data_pointer] = name; - return data_pointer; } +void XNNWeightsCache::release_entry(void* packed_data_ptr) { + packed_pointer_to_container_.erase(packed_data_ptr); +#ifndef _WIN32 + // Per-entry file-backed mmap region: munmap to release VM. The + // packed_data_ptrs_ slot is nulled by the caller so existing offsets + // stay valid. + auto region_it = file_ptr_to_region_index_.find(packed_data_ptr); + if (region_it != file_ptr_to_region_index_.end()) { + MmapRegion& region = mmap_regions_[region_it->second]; + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + region.addr = nullptr; + region.size = 0; + } + file_ptr_to_region_index_.erase(region_it); + } +#endif +} + +void XNNWeightsCache::full_unload() { +#ifndef _WIN32 + for (auto& region : mmap_regions_) { + if (region.addr != nullptr && region.addr != MAP_FAILED) { + munmap(region.addr, region.size); + region.addr = nullptr; + region.size = 0; + } + } + mmap_regions_.clear(); + mmap_regions_synced_ = 0; + packed_data_ptrs_.clear(); + ptr_to_file_offset_.clear(); + file_ptr_to_region_index_.clear(); + cache_loaded_ = false; + if (packed_file_fd_ >= 0) { + close(packed_file_fd_); + packed_file_fd_ = -1; + } +#endif +} + Error XNNWeightsCache::delete_packed_data( const std::vector& packed_data_names) { if (!is_finalized_) { ET_LOG( Error, - "Error, attempted to delete packed data from the cache but the cache is not finalized"); + "delete_packed_data called before finalize_for_runtime"); return Error::InvalidArgument; } for (const std::string& name : packed_data_names) { auto entry = name_to_packed_data_metadata_.find(name); if (entry == name_to_packed_data_metadata_.end()) { - ET_LOG( - Error, - "Error, attempted to deleted packed data: %s, from the cache but it wasn't found", - name.c_str()); + ET_LOG(Error, "delete_packed_data: '%s' not found", name.c_str()); return Error::InvalidArgument; - } else { - entry->second.ref_count--; - if (entry->second.ref_count == 0) { - void* packed_data_ptr = packed_data_ptrs_[entry->second.offset]; - // Erase the key/value from the map frees the pointer holding the - // packed data. No-op on the file-backed mmap path, where the - // container is not populated. - packed_pointer_to_container_.erase(packed_data_ptr); -#ifndef _WIN32 - // File-backed mmap path: munmap the region so VM and page-cache - // usage is released, not just retained until cache destruction. - // The vector slot is set to nullptr below so existing offsets remain - // valid for any concurrent lookups. - auto region_it = file_ptr_to_region_index_.find(packed_data_ptr); - if (region_it != file_ptr_to_region_index_.end()) { - size_t idx = region_it->second; - MmapRegion& region = mmap_regions_[idx]; - if (region.addr != nullptr && region.addr != MAP_FAILED) { - munmap(region.addr, region.size); - region.addr = nullptr; - region.size = 0; - } - file_ptr_to_region_index_.erase(region_it); - } -#endif - // Remove the pointer from packed_data_ptrs_. - packed_data_ptrs_[entry->second.offset] = nullptr; - // Erase the name to packed metadata entry. - name_to_packed_data_metadata_.erase(entry->first); - } } + if (--entry->second.ref_count > 0) { + continue; + } + // Keep from_load entries: their packed bytes live in the cache file + // and stay valid until full unload. Erasing them would force the + // next init to re-pack and append ~450 MB to the file per cycle. + if (entry->second.from_load) { + entry->second.in_current_runtime = false; + continue; + } + release_entry(packed_data_ptrs_[entry->second.offset]); + packed_data_ptrs_[entry->second.offset] = nullptr; + name_to_packed_data_metadata_.erase(entry); } + // Last entry gone: drop all in-memory state. File on disk is preserved + // so the next process can load_packed_cache and skip re-packing. If + // reserve_space after the last save corrupted the trailer, load will + // fall through to fresh-write — same outcome as truncating here. + if (name_to_packed_data_metadata_.empty()) { + full_unload(); + } return Error::Ok; } @@ -226,15 +337,11 @@ size_t XNNWeightsCache::look_up( const void* unpacked_weights_ptr = cache_key->kernel; const void* unpacked_bias_ptr = cache_key->bias; auto entry = context->unpacked_data_to_name_.find(unpacked_weights_ptr); - - // Check if weight_pointer has been cached if (entry == context->unpacked_data_to_name_.end()) { return SIZE_MAX; } - std::string weight_bias_name = entry->second; - // Check if bias_pointer has been cached if (unpacked_bias_ptr != nullptr) { auto bias_entry = context->unpacked_data_to_name_.find(unpacked_bias_ptr); if (bias_entry != context->unpacked_data_to_name_.end()) { @@ -242,14 +349,12 @@ size_t XNNWeightsCache::look_up( } } - // check if weight_bias_name has been packed already auto packed_weight_entry = context->name_to_packed_data_metadata_.find(weight_bias_name); if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) { return SIZE_MAX; } packed_weight_entry->second.in_current_runtime = true; - return packed_weight_entry->second.offset; } @@ -264,16 +369,11 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { if (ftruncate(context->packed_file_fd_, file_offset + map_size) != 0) { ET_LOG( Error, - "ftruncate to %zu failed (errno=%d)", + "reserve_space ftruncate to %zu failed (errno=%d)", file_offset + map_size, errno); close(context->packed_file_fd_); context->packed_file_fd_ = -1; - // Existing mmap_regions_ still reference this inode. Disable the - // file-backed path permanently so a future initialize_for_runtime - // doesn't re-open + O_TRUNC the same path and trigger SIGBUS on the - // stale mappings. - context->packed_file_disabled_ = true; return context->reserve_space_heap(n); } @@ -285,15 +385,18 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { context->packed_file_fd_, file_offset); if (ptr == MAP_FAILED) { - ET_LOG(Error, "mmap %zu bytes failed (errno=%d)", map_size, errno); + ET_LOG( + Error, + "reserve_space mmap %zu bytes failed (errno=%d)", + map_size, + errno); close(context->packed_file_fd_); context->packed_file_fd_ = -1; - context->packed_file_disabled_ = true; return context->reserve_space_heap(n); } // mmap returns page-aligned (>= 4 KiB), which trivially satisfies the - // 64-byte kPackedAllocationAlignment XNNPACK expects. Assert defensively. + // 64-byte kPackedAllocationAlignment XNNPACK expects. ET_DCHECK_MSG( (reinterpret_cast(ptr) % kPackedAllocationAlignment) == 0, "mmap returned ptr not aligned to %zu bytes", @@ -302,10 +405,10 @@ void* XNNWeightsCache::reserve_space(XNNWeightsCache* context, size_t n) { context->packed_file_used_ = file_offset + map_size; context->file_ptr_to_region_index_[ptr] = context->mmap_regions_.size(); context->mmap_regions_.push_back({ptr, map_size}); + context->ptr_to_file_offset_[ptr] = file_offset; return ptr; } #endif - return context->reserve_space_heap(n); } @@ -343,11 +446,8 @@ size_t XNNWeightsCache::look_up_or_insert( size_t size) { size_t offset = context->look_up(context, cache_key); - // XNNPACK can call this with ptr==nullptr when it previously hit the cache - // and skipped packing. We can't validate against the ptr contents in this - // case, so just return the offset. This might actually be a bug in XNNPACK - // since calling look_up_or_insert with ptr==nullptr doesn't really make - // sense... + // XNNPACK calls with ptr==nullptr after a cache hit (no packing + // happened, nothing to validate against). Return the offset as-is. if (ptr == nullptr) { return offset; } @@ -357,7 +457,7 @@ size_t XNNWeightsCache::look_up_or_insert( if (saved_ptr != nullptr && 0 == memcmp(ptr, saved_ptr, size)) { return offset; } - // Failure, cache is out of date + // Cache out of date: name hits but packed bytes differ. return SIZE_MAX; } @@ -376,6 +476,7 @@ size_t XNNWeightsCache::look_up_or_insert( } PackedDataMeta packed_data_metadata; packed_data_metadata.offset = next_offset; + packed_data_metadata.data_size = size; packed_data_metadata.ref_count = 0; // ref_count is only incremented after finalizing for runtime packed_data_metadata.in_current_runtime = true; @@ -408,6 +509,189 @@ void XNNWeightsCache::set_packed_cache_path(const std::string& path) { packed_cache_path_ = path; } +Error XNNWeightsCache::save_packed_index() { +#ifndef _WIN32 + if (packed_file_fd_ < 0) { + return Error::Ok; + } + // Skip no-op saves: identical bytes would still bump mtime via + // pwrite/fsync, making the cache file appear modified on every load. + if (mmap_regions_.size() == mmap_regions_at_last_save_ && + mmap_regions_at_last_save_ > 0) { + return Error::Ok; + } + + size_t index_start = packed_file_used_; + std::vector buf; + uint32_t entry_count = 0; + + // Index entry: [name_len:u32][name][file_offset:u64][data_size:u64] + for (const auto& [name, meta] : name_to_packed_data_metadata_) { + void* ptr = packed_data_ptrs_[meta.offset]; + auto it = ptr_to_file_offset_.find(ptr); + if (it == ptr_to_file_offset_.end()) { + continue; + } + entry_count++; + append_le(buf, static_cast(name.size())); + buf.insert(buf.end(), name.begin(), name.end()); + append_le(buf, static_cast(it->second)); + append_le(buf, static_cast(meta.data_size)); + } + + // Footer: [index_start:u64][entry_count:u32][magic:u32][version:u32] + append_le(buf, static_cast(index_start)); + append_le(buf, entry_count); + append_le(buf, kCacheMagic); + append_le(buf, kCacheVersion); + + if (ftruncate(packed_file_fd_, index_start + buf.size()) != 0) { + ET_LOG(Error, "Failed to extend file for index (errno=%d)", errno); + return Error::Internal; + } + ssize_t written = + pwrite(packed_file_fd_, buf.data(), buf.size(), index_start); + if (written != static_cast(buf.size())) { + ET_LOG(Error, "Failed to write index (errno=%d)", errno); + return Error::Internal; + } + // Ensure trailer is on disk before we declare success. + if (fsync(packed_file_fd_) != 0) { + ET_LOG(Error, "fsync of packed cache failed (errno=%d)", errno); + // Continue — data is in page cache; durability is best-effort. + } + ET_LOG( + Info, + "Saved packed weight index: %u entries at offset %zu", + entry_count, + index_start); + + // Promote freshly-packed entries to from_load now that they're durable + // on disk, so delete_packed_data preserves them across unload/reload. + for (auto& [name, meta] : name_to_packed_data_metadata_) { + if (!meta.from_load && ptr_to_file_offset_.find( + packed_data_ptrs_[meta.offset]) != + ptr_to_file_offset_.end()) { + meta.from_load = true; + } + } + + mmap_regions_at_last_save_ = mmap_regions_.size(); + + // Close the fd so the next init re-enters load_packed_cache and reads + // the trailer we just wrote. + if (close(packed_file_fd_) != 0) { + ET_LOG(Error, "close of packed cache fd failed (errno=%d)", errno); + } + packed_file_fd_ = -1; +#endif + return Error::Ok; +} + +bool XNNWeightsCache::load_packed_cache() { +#ifndef _WIN32 + int fd = open(packed_cache_path_.c_str(), O_RDONLY); + if (fd < 0) { + return false; + } + // Prevent racing with a concurrent writer + if (flock(fd, LOCK_SH | LOCK_NB) != 0) { + close(fd); + return false; + } + struct stat st; + if (fstat(fd, &st) != 0 || st.st_size < 20) { + close(fd); + return false; + } + size_t file_size = static_cast(st.st_size); + + uint8_t footer[20]; + if (pread(fd, footer, 20, file_size - 20) != 20) { + close(fd); + return false; + } + uint64_t index_start = read_le(footer); + uint32_t entry_count = read_le(footer + 8); + uint32_t magic = read_le(footer + 12); + uint32_t version = read_le(footer + 16); + + if (magic != kCacheMagic || version != kCacheVersion || + index_start >= file_size - 20) { + close(fd); + return false; + } + const size_t index_region_end = file_size - 20; + + void* map = mmap(nullptr, file_size, PROT_READ, MAP_SHARED, fd, 0); + close(fd); + if (map == MAP_FAILED) { + return false; + } + mmap_regions_.push_back({map, file_size}); + + const uint8_t* cursor = static_cast(map) + index_start; + const uint8_t* end = static_cast(map) + index_region_end; + + for (uint32_t i = 0; i < entry_count && cursor + 4 <= end; ++i) { + uint32_t name_len = read_le(cursor); + cursor += 4; + if (cursor + name_len + 16 > end) { + break; + } + std::string name(reinterpret_cast(cursor), name_len); + cursor += name_len; + uint64_t file_offset = read_le(cursor); + cursor += 8; + uint64_t data_size = read_le(cursor); + cursor += 8; + + // Bounds check: the entry's bytes must lie entirely inside the + // packed-data region + if (file_offset >= index_start || + data_size > index_start - file_offset) { + ET_LOG( + Error, + "load_packed_cache: entry '%s' has invalid bounds (file_offset=%llu, data_size=%llu, index_start=%llu); aborting load", + name.c_str(), + static_cast(file_offset), + static_cast(data_size), + static_cast(index_start)); + // Roll back any partial state. + munmap(map, file_size); + mmap_regions_.pop_back(); + name_to_packed_data_metadata_.clear(); + packed_data_ptrs_.clear(); + ptr_to_file_offset_.clear(); + return false; + } + + size_t ptr_index = packed_data_ptrs_.size(); + void* entry_ptr = static_cast(map) + file_offset; + packed_data_ptrs_.push_back(entry_ptr); + // Tracked so a subsequent save_packed_index can rewrite the trailer + // covering both loaded and newly-packed entries. + ptr_to_file_offset_[entry_ptr] = file_offset; + PackedDataMeta meta; + meta.offset = ptr_index; + meta.data_size = data_size; + meta.ref_count = 0; + meta.in_current_runtime = false; + meta.from_load = true; + name_to_packed_data_metadata_[name] = meta; + } + + cache_loaded_ = true; + packed_file_used_ = index_start; + // In-memory state matches the on-disk trailer; the next save would be + // a no-op. Initialize watermark so save_packed_index short-circuits. + mmap_regions_at_last_save_ = mmap_regions_.size(); + return true; +#else + return false; +#endif +} + } // namespace delegate } // namespace xnnpack } // namespace backends diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h index a41fed49fd1..d6910b31ce3 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.h +++ b/backends/xnnpack/runtime/XNNWeightsCache.h @@ -31,11 +31,19 @@ using executorch::runtime::Result; struct PackedDataMeta { size_t offset; + size_t data_size{0}; // Count number of xnn_runtime_t this packed data is used in size_t ref_count; // true if this packed data was inserted or looked up for the // current runtime being created bool in_current_runtime; + // True if this entry's bytes are persisted in the on-disk cache file + // (either originally loaded via load_packed_cache, or freshly packed + // and then save_packed_index-ed). Used by delete_packed_data to + // detect when all persistent entries are gone, at which point + // cache_loaded_ is auto-invalidated so the next init re-enters + // load_packed_cache and reuses the saved file instead of re-packing. + bool from_load{false}; }; class XNNWeightsCache { @@ -138,7 +146,16 @@ class XNNWeightsCache { */ void set_packed_cache_path(const std::string& path); + /** Save packed weight index so subsequent loads skip packing. */ + Error save_packed_index(); + private: + static constexpr uint32_t kCacheMagic = 0x58505743; // "XPWC" + static constexpr uint32_t kCacheVersion = 1; + bool load_packed_cache(); + void reset_for_fresh_write(); + void release_entry(void* packed_data_ptr); + void full_unload(); // Runtime Allocator used to reserve memory for packed weights MemoryAllocator* runtime_allocator_; @@ -167,18 +184,24 @@ class XNNWeightsCache { std::string packed_cache_path_; int packed_file_fd_{-1}; size_t packed_file_used_{0}; - // Set after an unrecoverable mmap/ftruncate failure. Prevents re-opening - // the cache file on subsequent initialize_for_runtime() calls — re-opening - // with O_TRUNC would truncate the inode beneath any still-live mmap pages - // and the next access would raise SIGBUS. Once disabled, all reserve_space - // calls fall back to heap allocation for the lifetime of this cache. - bool packed_file_disabled_{false}; + // True once load_packed_cache() has populated metadata from a saved + // index, OR once a fresh-write session has been persisted to disk via + // save_packed_index() (so subsequent inits can load from it). + bool cache_loaded_{false}; + // Tracks file offset of each file-backed allocation. Used by + // save_packed_index() to serialize (name → offset, size) index. + std::unordered_map ptr_to_file_offset_; struct MmapRegion { void* addr; size_t size; }; std::vector mmap_regions_; size_t mmap_regions_synced_{0}; + // Number of regions present at the time of the most recent successful + // save_packed_index. Used to skip no-op saves (trailer would be byte- + // identical, but pwrite/fsync still bump mtime, making it look like the + // cache file is constantly "modified" when nothing has actually changed). + size_t mmap_regions_at_last_save_{0}; // For file-backed packed allocations, maps the returned ptr to its index // in mmap_regions_, so delete_packed_data() can munmap when ref_count==0. std::unordered_map file_ptr_to_region_index_; diff --git a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp index 83937887e25..4639d96152d 100644 --- a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp +++ b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp @@ -17,6 +17,9 @@ #include #include #include +#include +#include +#include #include using executorch::backends::xnnpack::delegate::XNNWeightsCache; @@ -352,4 +355,394 @@ TEST_F(XNNWeightsCacheTest, PackedWeightsMmapPathLockCollision) { ::unlink(cache_path.c_str()); } + +// Verify load_packed_cache produces byte-identical inference results to +// a fresh build of the same graph. Guards against weight pointers being +// mis-mapped after cache load. +TEST_F(XNNWeightsCacheTest, SaveAndLoad_PreservesInferenceOutput) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_output_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input_tensor( + num_batches * input_channels + padding, 1.0f); + + // Run 1: no cache file (pure heap pack). + std::vector output_baseline(num_batches * output_channels, 0.0f); + { + XNNWeightsCache cache; + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input_tensor.data(), + output_baseline.data()); + } + + // Run 2: file-backed mmap path, save trailer. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + std::vector output_write(num_batches * output_channels, 0.0f); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input_tensor.data(), + output_write.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + EXPECT_EQ(output_write, output_baseline); + } + + // Run 3: fresh instance loads from disk; output must match. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_GT(cache.get_packed_data_names().size(), 0u); + std::vector output_load(num_batches * output_channels, 0.0f); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input_tensor.data(), + output_load.data()); + EXPECT_EQ(output_load, output_baseline); + } + + ::unlink(cache_path.c_str()); +} + +// Corrupted cache file must not crash; load_packed_cache returns false and +// the next init falls through to the fresh-build path that overwrites it. +TEST_F(XNNWeightsCacheTest, LoadPackedCache_RejectsCorruptTrailer) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_corrupt_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + // Write a file with valid size but garbage trailer. + { + std::ofstream f(cache_path, std::ios::binary); + std::vector garbage(1024, '\xCC'); + f.write(garbage.data(), garbage.size()); + } + + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + // Must not crash; load returns false → falls through to fresh build. + Error err = + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_EQ(err, Error::Ok); + + // Fresh build still works. + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector output(num_batches * output_channels, 0.0f); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + + ::unlink(cache_path.c_str()); +} + +// Repeated init+run+save cycles on the same file must not grow the cache +// file. Guards against the regression where each PTE init re-packed weights +// and appended a fresh copy (+500 MB per inference observed in production). +TEST_F(XNNWeightsCacheTest, MultiSessionLoad_DoesNotGrowCacheFile) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_nogrow_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector output(num_batches * output_channels, 0.0f); + + // Cycle 1: fresh write of cache. + off_t size_after_first_save = 0; + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + size_after_first_save = st.st_size; + ASSERT_GT(size_after_first_save, 0); + } + + // Cycle 2: fresh instance loads from disk, runs, saves. No new weights + // were packed → file must be byte-for-byte identical in length. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_GT(cache.get_packed_data_names().size(), 0u); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + } + { + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + EXPECT_EQ(st.st_size, size_after_first_save); + } + + // Cycle 3: simulate PTE destroy + recreate inside the same instance. + // delete_packed_data on from_load entries must not erase metadata, so + // the second init's look_up still hits → no new file append. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + cache.delete_packed_data(cache.get_packed_data_names()); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + } + { + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + EXPECT_EQ(st.st_size, size_after_first_save); + } + + ::unlink(cache_path.c_str()); +} + +// After loading from disk, delete_packed_data must skip from_load entries +// so the next init still hits the cache. Bug would re-pack weights from +// scratch each time the backend destroys + recreates a delegate. +TEST_F(XNNWeightsCacheTest, DeletePackedData_OnFromLoadEntries_PreservesMetadata) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_fromload_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector output(num_batches * output_channels, 0.0f); + + // Seed the cache file. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + } + + // Fresh instance: all populated entries are from_load=true. + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + size_t loaded_count = cache.get_packed_data_names().size(); + ASSERT_GT(loaded_count, 0u); + + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + + // Repeated delete must never erase from_load entries — contrast with + // ReusePackedWeights where two delete calls drop the count to 0. + for (int i = 0; i < 5; ++i) { + cache.delete_packed_data(cache.get_packed_data_names()); + EXPECT_EQ(cache.get_packed_data_names().size(), loaded_count) + << "from_load entries should survive delete; iteration " << i; + } + + ::unlink(cache_path.c_str()); +} + +// A model with multiple PTE/method delegates initializes the cache +// sequentially before any one is destroyed. The second PTE's init must +// see the first PTE's packed entries already in the map → look_up hits, +// no new reserve_space, file does not grow per PTE. +TEST_F(XNNWeightsCacheTest, MultiplePTEsInSameInstance_NoFileGrowth) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_multipte_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector out_pte1(num_batches * output_channels, 0.0f); + std::vector out_pte2(num_batches * output_channels, 0.0f); + + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + + // PTE 1: fresh pack + save. + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + out_pte1.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + + off_t size_after_pte1 = 0; + { + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + size_after_pte1 = st.st_size; + ASSERT_GT(size_after_pte1, 0); + } + size_t names_after_pte1 = cache.get_packed_data_names().size(); + ASSERT_GT(names_after_pte1, 0u); + + // PTE 2: sibling delegate, NO destroy between. look_up must hit the + // entry from PTE 1 → no new reserve_space → file size unchanged after + // save. + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + out_pte2.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + + { + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + EXPECT_EQ(st.st_size, size_after_pte1) + << "PTE 2 with same weights must not append to the cache file"; + } + EXPECT_EQ(cache.get_packed_data_names().size(), names_after_pte1); + + // Both PTEs produced the same output for the same input (correctness). + EXPECT_EQ(out_pte1, out_pte2); + + // PTE 3: third sibling. Still no growth. + std::vector out_pte3(num_batches * output_channels, 0.0f); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + out_pte3.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + { + struct stat st {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st), 0); + EXPECT_EQ(st.st_size, size_after_pte1); + } + EXPECT_EQ(out_pte3, out_pte1); + + ::unlink(cache_path.c_str()); +} + +// save_packed_index must be a true no-op when no new reserve_space happened +// since the last save — same content but writing would still bump mtime, +// making the cache file look modified on every model load. +TEST_F(XNNWeightsCacheTest, SavePackedIndex_NoNewReserves_IsNoOp) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_noop_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector output(num_batches * output_channels, 0.0f); + + // Seed cache + first save. + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + + struct stat st_before {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st_before), 0); + + // Sleep so mtime would tick if a write actually happened. + ::sleep(1); + + // Second save with no intervening reserve_space → no-op short-circuit. + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + + struct stat st_after {}; + ASSERT_EQ(::stat(cache_path.c_str(), &st_after), 0); + EXPECT_EQ(st_before.st_size, st_after.st_size); + EXPECT_EQ(st_before.st_mtime, st_after.st_mtime); + + ::unlink(cache_path.c_str()); +} + #endif From 7739ca2bdec506610d1027650b16466fbb4cb970 Mon Sep 17 00:00:00 2001 From: Longfang Zhao Date: Tue, 9 Jun 2026 16:42:26 -0700 Subject: [PATCH 2/2] Per-entry seed in XNNPACK weights cache for XNNPACK-upgrade invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: XNNPACK exposes `xnn_weights_cache_look_up_key.seed` — a per-ukernel value that XNNPACK guarantees is consistent across runs of the same ukernel and changes whenever a ukernel implementation changes. Store this seed per cache entry so a stale cached packing produced by an old XNNPACK ukernel is rejected after upgrade, instead of being handed back to a newer ukernel that expects a different layout. Changes: - `PackedDataMeta` gains `uint32_t seed{0}`. - `look_up` rejects (returns `SIZE_MAX`) when a name hit has a stored seed that doesn't match `cache_key->seed`. This forces `look_up_or_insert` to re-pack with the current ukernel and avoids the slow `memcmp` path catching it later. - `look_up_or_insert` records `cache_key->seed` on insert. - On-disk index entry layout extended to `[name_len:u32][name][file_offset:u64][data_size:u64][seed:u32]` (was 16 bytes after the name, now 20). - `load_packed_cache` reads the per-entry seed and bumps the trailing bytes bound check accordingly. - `kCacheVersion` bumped 1 → 2 so existing v1 files (which carry no seed) are rejected at load instead of being loaded with `seed=0` and mismatching every fresh `look_up`. Cleanup of orphaned in-memory and on-disk entries left by an invalidated look-up is a follow-up — this diff only adds the detection. Differential Revision: D108082431 --- backends/xnnpack/runtime/XNNWeightsCache.cpp | 23 +- backends/xnnpack/runtime/XNNWeightsCache.h | 13 +- .../test/runtime/test_xnn_weights_cache.cpp | 227 ++++++++++++++++++ 3 files changed, 260 insertions(+), 3 deletions(-) diff --git a/backends/xnnpack/runtime/XNNWeightsCache.cpp b/backends/xnnpack/runtime/XNNWeightsCache.cpp index 9eab694d529..f75ff8adb94 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.cpp +++ b/backends/xnnpack/runtime/XNNWeightsCache.cpp @@ -354,6 +354,19 @@ size_t XNNWeightsCache::look_up( if (packed_weight_entry == context->name_to_packed_data_metadata_.end()) { return SIZE_MAX; } + // XNNPACK upgrade detection: a ukernel whose implementation changed + // produces a different seed. Reject the cached entry so look_up_or_insert + // falls through to re-pack with the current ukernel. + if (packed_weight_entry->second.seed != cache_key->seed) { + ET_LOG( + Info, + "look_up: seed mismatch for '%s' (cached=0x%08x, current=0x%08x); " + "treating as miss for re-pack", + weight_bias_name.c_str(), + packed_weight_entry->second.seed, + cache_key->seed); + return SIZE_MAX; + } packed_weight_entry->second.in_current_runtime = true; return packed_weight_entry->second.offset; } @@ -480,6 +493,7 @@ size_t XNNWeightsCache::look_up_or_insert( packed_data_metadata.ref_count = 0; // ref_count is only incremented after finalizing for runtime packed_data_metadata.in_current_runtime = true; + packed_data_metadata.seed = cache_key->seed; context->name_to_packed_data_metadata_[weight_bias_name] = packed_data_metadata; } else { @@ -525,7 +539,7 @@ Error XNNWeightsCache::save_packed_index() { std::vector buf; uint32_t entry_count = 0; - // Index entry: [name_len:u32][name][file_offset:u64][data_size:u64] + // Index entry: [name_len:u32][name][file_offset:u64][data_size:u64][seed:u32] for (const auto& [name, meta] : name_to_packed_data_metadata_) { void* ptr = packed_data_ptrs_[meta.offset]; auto it = ptr_to_file_offset_.find(ptr); @@ -537,6 +551,7 @@ Error XNNWeightsCache::save_packed_index() { buf.insert(buf.end(), name.begin(), name.end()); append_le(buf, static_cast(it->second)); append_le(buf, static_cast(meta.data_size)); + append_le(buf, meta.seed); } // Footer: [index_start:u64][entry_count:u32][magic:u32][version:u32] @@ -636,7 +651,8 @@ bool XNNWeightsCache::load_packed_cache() { for (uint32_t i = 0; i < entry_count && cursor + 4 <= end; ++i) { uint32_t name_len = read_le(cursor); cursor += 4; - if (cursor + name_len + 16 > end) { + // [file_offset:u64][data_size:u64][seed:u32] = 20 bytes + if (cursor + name_len + 20 > end) { break; } std::string name(reinterpret_cast(cursor), name_len); @@ -645,6 +661,8 @@ bool XNNWeightsCache::load_packed_cache() { cursor += 8; uint64_t data_size = read_le(cursor); cursor += 8; + uint32_t seed = read_le(cursor); + cursor += 4; // Bounds check: the entry's bytes must lie entirely inside the // packed-data region @@ -678,6 +696,7 @@ bool XNNWeightsCache::load_packed_cache() { meta.ref_count = 0; meta.in_current_runtime = false; meta.from_load = true; + meta.seed = seed; name_to_packed_data_metadata_[name] = meta; } diff --git a/backends/xnnpack/runtime/XNNWeightsCache.h b/backends/xnnpack/runtime/XNNWeightsCache.h index d6910b31ce3..f00713e5a1c 100644 --- a/backends/xnnpack/runtime/XNNWeightsCache.h +++ b/backends/xnnpack/runtime/XNNWeightsCache.h @@ -44,6 +44,13 @@ struct PackedDataMeta { // cache_loaded_ is auto-invalidated so the next init re-enters // load_packed_cache and reuses the saved file instead of re-packing. bool from_load{false}; + // Per-ukernel seed from xnn_weights_cache_look_up_key.seed. XNNPACK + // guarantees this is consistent across runs of the same ukernel; when + // XNNPACK upgrades and a ukernel implementation changes, the seed + // changes. look_up rejects entries whose stored seed doesn't match + // the caller's seed so that stale cache entries don't deliver wrongly + // packed weights to a newer ukernel. + uint32_t seed{0}; }; class XNNWeightsCache { @@ -151,7 +158,11 @@ class XNNWeightsCache { private: static constexpr uint32_t kCacheMagic = 0x58505743; // "XPWC" - static constexpr uint32_t kCacheVersion = 1; + // Bump when the on-disk layout (footer or per-entry record) changes. + // v2: per-entry seed added — old v1 files don't carry seeds and would + // load with seed=0, mismatching every fresh look_up with a non-zero + // seed, causing a stampede of re-packs. Reject v1 outright. + static constexpr uint32_t kCacheVersion = 2; bool load_packed_cache(); void reset_for_fresh_write(); void release_entry(void* packed_data_ptr); diff --git a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp index 4639d96152d..80b19865024 100644 --- a/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp +++ b/backends/xnnpack/test/runtime/test_xnn_weights_cache.cpp @@ -699,6 +699,233 @@ TEST_F(XNNWeightsCacheTest, MultiplePTEsInSameInstance_NoFileGrowth) { ::unlink(cache_path.c_str()); } +namespace { + +// Little-endian decode helpers matching XNNWeightsCache's on-disk format. +uint32_t read_le_u32(const uint8_t* p) { + uint32_t v = 0; + for (int i = 0; i < 4; ++i) { + v |= static_cast(p[i]) << (8 * i); + } + return v; +} +uint64_t read_le_u64(const uint8_t* p) { + uint64_t v = 0; + for (int i = 0; i < 8; ++i) { + v |= static_cast(p[i]) << (8 * i); + } + return v; +} +void write_le_u32(std::ostream& f, uint32_t v) { + for (int i = 0; i < 4; ++i) { + char b = static_cast((v >> (8 * i)) & 0xff); + f.write(&b, 1); + } +} +void write_le_u64(std::ostream& f, uint64_t v) { + for (int i = 0; i < 8; ++i) { + char b = static_cast((v >> (8 * i)) & 0xff); + f.write(&b, 1); + } +} + +} // namespace + +// A cache file written by older code (kCacheVersion=1) carries no per-entry +// seed field. Loading such a file with the current schema would yield +// entries with seed=0 and mismatch every fresh look_up. The version bump +// must reject it outright so the next init re-packs from scratch. +TEST_F(XNNWeightsCacheTest, LoadPackedCache_RejectsV1Format) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_v1_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + // v1 layout: 64 bytes of dummy data, then 20-byte footer with version=1. + { + std::ofstream f(cache_path, std::ios::binary); + std::vector data(64, 0); + f.write(data.data(), data.size()); + write_le_u64(f, 64); // index_start + write_le_u32(f, 0); // entry_count + write_le_u32(f, 0x58505743); // kCacheMagic "XPWC" + write_le_u32(f, 1); // OLD kCacheVersion = 1 + } + + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + Error err = + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_EQ(err, Error::Ok); + // Version mismatch → load_packed_cache returned false → no entries. + EXPECT_EQ(cache.get_packed_data_names().size(), 0u); + + ::unlink(cache_path.c_str()); +} + +// Verify save_packed_index writes the schema version 2 footer and embeds a +// 4-byte seed field in each entry record. Guards against future refactors +// silently dropping the seed write. +TEST_F(XNNWeightsCacheTest, SavePackedIndex_EntryFormatIncludesSeed) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_format_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + std::vector output(num_batches * output_channels, 0.0f); + + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + output.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + } + + // Parse footer at file_size - 20. + std::ifstream f(cache_path, std::ios::binary); + ASSERT_TRUE(f.is_open()); + f.seekg(0, std::ios::end); + size_t file_size = f.tellg(); + ASSERT_GE(file_size, 24u); + + uint8_t footer[20]; + f.seekg(file_size - 20); + f.read(reinterpret_cast(footer), 20); + uint32_t magic = read_le_u32(footer + 12); + uint32_t version = read_le_u32(footer + 16); + EXPECT_EQ(magic, 0x58505743u); + EXPECT_EQ(version, 2u); + + // Walk first entry: [name_len:u32][name][file_offset:u64][data_size:u64][seed:u32] + uint64_t index_start = read_le_u64(footer); + uint32_t entry_count = read_le_u32(footer + 8); + ASSERT_GT(entry_count, 0u); + + f.seekg(index_start); + uint8_t name_len_buf[4]; + f.read(reinterpret_cast(name_len_buf), 4); + uint32_t name_len = read_le_u32(name_len_buf); + + // The seed field sits at index_start + 4 + name_len + 8 + 8. + f.seekg(index_start + 4 + name_len + 8 + 8); + uint8_t seed_buf[4]; + f.read(reinterpret_cast(seed_buf), 4); + // XNNPACK ukernel seeds are non-zero in practice. The signal here is + // simply that 4 well-formed bytes follow the size field — confirming + // the new entry layout was written, not the legacy 16-byte tail. + uint32_t stored_seed = read_le_u32(seed_buf); + EXPECT_NE(stored_seed, 0u); + + ::unlink(cache_path.c_str()); +} + +// After loading a cache file whose entry seed has been tampered with +// (simulating an XNNPACK upgrade where the same ukernel now emits a +// different seed), the next inference must produce correct output. Either +// look_up's seed check or look_up_or_insert's memcmp fallback drives the +// re-pack; this test exercises the end-to-end safety net. +TEST_F(XNNWeightsCacheTest, LoadPackedCache_CorruptedSeed_ProducesCorrectOutput) { + std::string cache_path = std::string("/tmp/xnn_weights_cache_badseed_") + + std::to_string(::getpid()) + ".packed_cache"; + ::unlink(cache_path.c_str()); + + std::vector batches{1, 2, 3}; + size_t input_channels = 3; + size_t output_channels = 4; + size_t num_batches = 1 * 2 * 3; + size_t padding = 32; + std::vector input(num_batches * input_channels + padding, 1.0f); + + // Baseline: fresh pack, heap-only, no cache file. + std::vector baseline(num_batches * output_channels, 0.0f); + { + XNNWeightsCache cache; + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + baseline.data()); + } + + // Write a valid cache file. + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + std::vector out(num_batches * output_channels, 0.0f); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + out.data()); + ASSERT_EQ(cache.save_packed_index(), Error::Ok); + } + + // Corrupt the seed field of the first entry to a value no real ukernel + // would emit (0xDEADBEEF). + { + std::fstream f(cache_path, std::ios::binary | std::ios::in | std::ios::out); + ASSERT_TRUE(f.is_open()); + f.seekg(0, std::ios::end); + size_t file_size = f.tellg(); + ASSERT_GE(file_size, 24u); + + uint8_t footer_buf[20]; + f.seekg(file_size - 20); + f.read(reinterpret_cast(footer_buf), 20); + uint64_t index_start = read_le_u64(footer_buf); + uint32_t entry_count = read_le_u32(footer_buf + 8); + ASSERT_GT(entry_count, 0u); + + f.seekg(index_start); + uint8_t name_len_buf[4]; + f.read(reinterpret_cast(name_len_buf), 4); + uint32_t name_len = read_le_u32(name_len_buf); + + size_t seed_offset = index_start + 4 + name_len + 8 + 8; + f.seekp(seed_offset); + uint32_t corrupted = 0xDEADBEEFu; + f.write(reinterpret_cast(&corrupted), 4); + f.close(); + } + + // Reload and run. Output must still match baseline. + std::vector after_corruption(num_batches * output_channels, 0.0f); + { + XNNWeightsCache cache; + cache.set_packed_cache_path(cache_path); + cache.initialize_for_runtime(memory_allocator_.get(), data_map_.get()); + ASSERT_GT(cache.get_packed_data_names().size(), 0u); + BuildAndRunGraphWithWeightsCache( + cache, + batches, + input_channels, + output_channels, + input.data(), + after_corruption.data()); + } + + EXPECT_EQ(after_corruption, baseline); + + ::unlink(cache_path.c_str()); +} + // save_packed_index must be a true no-op when no new reserve_space happened // since the last save — same content but writing would still bump mtime, // making the cache file look modified on every model load.