From 3577904ad3023ad169ae3f4c400c75f8937df87e Mon Sep 17 00:00:00 2001 From: Goober5000 Date: Sun, 10 May 2026 21:16:06 -0400 Subject: [PATCH] Harden 7-Zip extraction and LZ41 decompression Closes two unfinished items left by recent security commits. 7-Zip path traversal (gap left by e20a3ed): Commit e20a3ed added per-entry IsSubPath validation to the SharpCompress extraction loops in KnUtils, but the 7-Zip path was unguarded. Both Decompressor.SevenZip mode and Auto mode (which falls back to 7za.exe after a SharpCompress failure) shelled out via SevenZipConsoleWrapper with -aoa -y and let 7-Zip extract any entry, including ../traversal and absolute paths. - New SevenZipConsoleWrapper.ListArchiveEntries enumerates entries via "7za l -slt -sccUTF-8" and parses the key=value blocks. Symbolic Link and Hard Link targets are returned alongside Path so the caller can validate them too. - DecompressFile now calls a new ValidateArchiveEntries helper before each extraction pass. Any entry that fails IsSubPath aborts extraction with no files written. - The .tar.gz two-pass case validates both the outer .tar.gz AND the intermediate .tar between the two Run() calls; if the inner tar fails validation the intermediate is deleted before returning false. - StandardOutputEncoding = UTF-8 plus the -sccUTF-8 flag pin the listing output to UTF-8. Without this, on Windows the OEM codepage decoding could mismatch the bytes 7za writes during extraction, allowing a crafted entry to bypass the post-list validation. LZ41 decompressor hardening (gaps left by 5eb206e): Commit 5eb206e added floor checks for numOffsets, length, and blockSize plus an endBlock bound, but three vectors remained: the offset-table entries themselves were unchecked (allowed arbitrary inputStream.Position seeks - sibling-file disclosure for VP archives), blockSize had no upper bound (a hostile footer could trigger gigabyte allocations per block), and exceptions in the worker thread were silently swallowed (caller proceeded with uncompressedSize = 0 on a corrupted footer). - Each offsets[i] read is now validated against the compressed-data region and required to be strictly greater than the previous entry (matches the compressor's strictly-monotonic emission starting at offset 4). - blockSize is capped at 4 MB to match the compressor's Max4MB enum. - cmpBytes (offsets[i+1] - offsets[i]) is checked at decode time against LZ4_compressBound(blockSize); offset-table validation alone is insufficient because the gap can legitimately fit within compressedFileSize while exceeding the cmpBuf allocation. - VPCompression.DecompressStream now wraps the LZ41 worker thread call in try/catch and rethrows via ExceptionDispatchInfo to preserve the original stack trace. The raw Thread (with explicit 1 MB stack) is kept since LZ4's recursive native decode needs it on platforms where Task.Run can't guarantee the stack size. Co-Authored-By: Claude Opus 4.7 (1M context) --- IonKiwi.lz4/LZ4RawUtility.cs | 21 ++- Knossos.NET/Classes/SevenZipConsoleWrapper.cs | 136 +++++++++++++++++- VP.NET/VPCompression.cs | 14 +- 3 files changed, 167 insertions(+), 4 deletions(-) diff --git a/IonKiwi.lz4/LZ4RawUtility.cs b/IonKiwi.lz4/LZ4RawUtility.cs index e5a9219c..95da0172 100644 --- a/IonKiwi.lz4/LZ4RawUtility.cs +++ b/IonKiwi.lz4/LZ4RawUtility.cs @@ -307,16 +307,30 @@ public static unsafe int LZ41_Stream_Decompress(Stream inputStream, Stream outpu inputStream.Position += 4; /* Block Size */ + //Compressor's enum maxes out at Max4MB (see _blockSize values above). Cap at the same value to + //prevent a hostile footer from triggering a multi-GB allocation per block. + const int MaxBlockSize = 4 * 1024 * 1024; int blockSize = br.ReadInt32(); - if (blockSize <= 0) + if (blockSize <= 0 || blockSize > MaxBlockSize) throw new Exception($"Invalid block size in LZ41 footer: {blockSize}"); /* Read the offsets tail */ inputStream.Position = initialPosition + (compressedFileSize.Value - 12 - (numOffsets * 4)); + //Validate each offset as it's read. Compressor emits strictly-monotonic offsets starting at 4 + //(post-header), so out-of-range or non-increasing entries are tampering and would otherwise drive + //arbitrary inputStream.Position seeks (sibling-file disclosure inside a VP archive). + int maxValidOffset = compressedFileSize.Value - 12 - (numOffsets * 4); + int prevOffset = -1; for(var i=0; i maxValidOffset) + throw new Exception($"LZ41: offset table entry out of range at index {i}: {off}"); + if (off <= prevOffset) + throw new Exception($"LZ41: offset table not strictly monotonic at index {i}: {off} <= {prevOffset}"); + offsets.Add(off); + prevOffset = off; } /* The blocks [currentBlock to endBlock] contain the data we want */ @@ -337,6 +351,9 @@ public static unsafe int LZ41_Stream_Decompress(Stream inputStream, Stream outpu { /* The difference in offsets is the size of the block */ int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock]; + //Even with strictly-monotonic in-range offsets, the gap can still exceed cmpBuf's allocation. + if (cmpBytes < 0 || cmpBytes > lz4.LZ4_compressBound(blockSize)) + throw new Exception($"LZ41: block {currentBlock} compressed size out of range: {cmpBytes}"); byte[] cmpBuf = new byte[lz4.LZ4_compressBound(blockSize)]; inputStream.Read(cmpBuf,0, cmpBytes); fixed (byte* cmpPtr = cmpBuf) diff --git a/Knossos.NET/Classes/SevenZipConsoleWrapper.cs b/Knossos.NET/Classes/SevenZipConsoleWrapper.cs index 8aa224d5..69a207b5 100644 --- a/Knossos.NET/Classes/SevenZipConsoleWrapper.cs +++ b/Knossos.NET/Classes/SevenZipConsoleWrapper.cs @@ -1,7 +1,9 @@ using Avalonia.Platform; using System; +using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; @@ -54,7 +56,9 @@ public SevenZipConsoleWrapper(Action? progressCallback = null, Cancellation } /// - /// Decompress a .zip, .7z or .tar.gz file to a folder using the 7zip cmdline tool + /// Decompress a .zip, .7z or .tar.gz file to a folder using the 7zip cmdline tool. + /// Each archive (and the intermediate .tar for .tar.gz inputs) is enumerated first via + /// ListArchiveEntries and any entry that escapes destFolder aborts extraction with no files written. /// /// /// @@ -66,6 +70,9 @@ public async Task DecompressFile(string sourceFile, string destFolder, boo if (disposed) throw new ObjectDisposedException("This object was already disposed."); + if (!await ValidateArchiveEntries(sourceFile, destFolder)) + return false; + bool isTarGz = false; if (sourceFile.EndsWith(".tar.gz", StringComparison.OrdinalIgnoreCase)) @@ -83,6 +90,11 @@ public async Task DecompressFile(string sourceFile, string destFolder, boo if (isTarGz && result) { sourceFile = Path.Combine(destFolder, Path.GetFileName(sourceFile).Replace(".tar.gz", ".tar")); + if (!await ValidateArchiveEntries(sourceFile, destFolder)) + { + try { File.Delete(sourceFile); } catch { } + return false; + } if (extractFullPath) cmdline = "x "; else @@ -99,6 +111,128 @@ public async Task DecompressFile(string sourceFile, string destFolder, boo return result; } + /// + /// Verifies every entry in the archive (plus any symlink/hardlink targets) resolves inside destFolder. + /// Returns false if any entry escapes, if the archive can't be enumerated, or on subprocess failure. + /// + private async Task ValidateArchiveEntries(string archivePath, string destFolder) + { + var entries = await ListArchiveEntries(archivePath); + if (entries == null) + { + Log.Add(Log.LogSeverity.Error, "SevenZipConsoleWrapper.ValidateArchiveEntries", "Refusing to extract: could not enumerate entries of " + archivePath); + return false; + } + foreach (var entry in entries) + { + //Mirror the SharpCompress normalization in KnUtils.DecompressFileSharpCompress so backslashes + //in archive entries are treated as separators on POSIX (where Path.GetFullPath would otherwise + //treat them as literal filename characters). + var normalized = entry.Replace('\\', '/').Replace('/', Path.DirectorySeparatorChar); + if (string.IsNullOrEmpty(normalized) || !KnUtils.IsSubPath(destFolder, normalized)) + { + Log.Add(Log.LogSeverity.Error, "SevenZipConsoleWrapper.ValidateArchiveEntries", "Refusing to extract: archive entry escapes destination: " + entry); + return false; + } + } + return true; + } + + /// + /// Enumerates entry paths in an archive without extracting, by running "7za l -slt -sccUTF-8". + /// Returns the list of entry paths plus any Symbolic Link / Hard Link targets. Returns null on + /// subprocess failure (caller should treat as "cannot validate, refuse to extract"). + /// + public async Task?> ListArchiveEntries(string archivePath) + { + if (disposed) + throw new ObjectDisposedException("This object was already disposed."); + + if (pathToConsoleExecutable == null) + { + Log.Add(Log.LogSeverity.Error, "SevenZipConsoleWrapper.ListArchiveEntries", "7z executable not available"); + return null; + } + + var stdoutLines = new List(); + int exitCode = -1; + + try + { + using (var listProcess = new Process()) + { + listProcess.StartInfo.FileName = pathToConsoleExecutable; + listProcess.StartInfo.Arguments = "l -slt -sccUTF-8 \"" + archivePath + "\""; + listProcess.StartInfo.UseShellExecute = false; + listProcess.StartInfo.RedirectStandardOutput = true; + listProcess.StartInfo.RedirectStandardError = true; + listProcess.StartInfo.CreateNoWindow = true; + //Pin UTF-8 — without this, Windows OEM codepage decoding can mismatch the bytes 7za writes + //during extraction, allowing a crafted entry to bypass the post-list validation. + listProcess.StartInfo.StandardOutputEncoding = Encoding.UTF8; + listProcess.StartInfo.StandardErrorEncoding = Encoding.UTF8; + listProcess.OutputDataReceived += (s, e) => { if (e.Data != null) stdoutLines.Add(e.Data); }; + listProcess.ErrorDataReceived += (s, e) => { if (e.Data != null) Log.Add(Log.LogSeverity.Warning, "SevenZipConsoleWrapper.ListArchiveEntries", e.Data); }; + listProcess.Start(); + listProcess.BeginOutputReadLine(); + listProcess.BeginErrorReadLine(); + await listProcess.WaitForExitAsync(); + exitCode = listProcess.ExitCode; + } + } + catch (Exception ex) + { + Log.Add(Log.LogSeverity.Error, "SevenZipConsoleWrapper.ListArchiveEntries", ex); + return null; + } + + if (exitCode != 0) + { + Log.Add(Log.LogSeverity.Error, "SevenZipConsoleWrapper.ListArchiveEntries", "7z list returned exit code " + exitCode + " for " + archivePath); + return null; + } + + //Output format: blocks of "Key = Value" lines separated by blank lines. The first block is the + //archive's own info (Path = , no Size field). Each subsequent block is one entry. + var entries = new List(); + var currentBlock = new Dictionary(); + + void FlushBlock() + { + if (currentBlock.Count > 0) + { + //Skip the archive-info block (no Size field) and any header blocks without a Path. + if (currentBlock.ContainsKey("Size") && currentBlock.TryGetValue("Path", out var path) && !string.IsNullOrEmpty(path)) + { + entries.Add(path); + if (currentBlock.TryGetValue("Symbolic Link", out var symlink) && !string.IsNullOrEmpty(symlink)) + entries.Add(symlink); + if (currentBlock.TryGetValue("Hard Link", out var hardlink) && !string.IsNullOrEmpty(hardlink)) + entries.Add(hardlink); + } + currentBlock.Clear(); + } + } + + foreach (var line in stdoutLines) + { + if (string.IsNullOrEmpty(line)) + { + FlushBlock(); + continue; + } + var idx = line.IndexOf(" = ", StringComparison.Ordinal); + if (idx <= 0) + continue; + var key = line.Substring(0, idx); + var value = line.Substring(idx + 3); + currentBlock[key] = value; + } + FlushBlock(); + + return entries; + } + /// /// Compress a folder into a .7z file with max LZMA2 compression /// destFile must be pass with the ".7z" extension. diff --git a/VP.NET/VPCompression.cs b/VP.NET/VPCompression.cs index 4cbe6d93..8a5b5b8d 100644 --- a/VP.NET/VPCompression.cs +++ b/VP.NET/VPCompression.cs @@ -101,13 +101,25 @@ public static int DecompressStream(Stream input, Stream output, CompressionHeade switch (header) { case CompressionHeader.LZ41: + //The 1 MB stack on the worker thread is needed for LZ4's recursive native decode; Task.Run + //can't guarantee it across all platforms. Marshal exceptions across the thread boundary via + //ExceptionDispatchInfo so a corrupted footer doesn't silently leave uncompressedSize = 0. + System.Runtime.ExceptionServices.ExceptionDispatchInfo? edi = null; var cpThread = new Thread(() => { Thread.CurrentThread.IsBackground = true; - uncompressedSize = LZ4RawUtility.LZ41_Stream_Decompress(input, output, compressedFileSize); + try + { + uncompressedSize = LZ4RawUtility.LZ41_Stream_Decompress(input, output, compressedFileSize); + } + catch (Exception ex) + { + edi = System.Runtime.ExceptionServices.ExceptionDispatchInfo.Capture(ex); + } }, 1048576); cpThread.Start(); cpThread.Join(); + edi?.Throw(); break; }