diff --git a/.cursor/skills/memory-snapshot-report/SKILL.md b/.cursor/skills/memory-snapshot-report/SKILL.md new file mode 100644 index 0000000..d6dfd37 --- /dev/null +++ b/.cursor/skills/memory-snapshot-report/SKILL.md @@ -0,0 +1,51 @@ +--- +name: memory-snapshot-report +description: Generate and view Unity memory snapshot reports. Use when the user wants to analyze a Unity memory snapshot, export it to a database, or generate/view an HTML report. +--- + +# Memory Snapshot Report + +## When to use + +- User wants to analyze a Unity memory snapshot (`.snap` file). +- User wants to export a snapshot to a DuckDB or SQLite database. +- User wants to generate or view an HTML report from an exported snapshot database. + +## Prerequisites + +- .NET 10 SDK. +- Project path: **MemorySnapshotDataTools** is the project root; run commands from that directory. + +## Steps + +### 1. Export snapshot to database + +From the MemorySnapshotDataTools directory: + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export --validate minimal --verbose +``` + +- Use `.duckdb` for DuckDB (recommended) or `.db` for SQLite. +- For SQLite add `--destination sqlite`. +- `--verbose` prints progress and timings (parse+extract vs. write). + +### 2. Generate HTML report + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report --out report.html --verbose +``` + +- Omit `--out` to write to a temp file and open in the browser. +- Use `--title "My Report"` to set the report title. +- Report works with either DuckDB or SQLite databases produced by the export command. + +### 3. Optional + +- Open the generated HTML file or DB in the user’s preferred viewer. +- For ad-hoc SQL, use the same DB path; tables include `snapshot_info`, `native_objects`, `managed_objects`, `connections`, `native_roots`, `memory_regions`, `native_allocations`. + +## Domain + +- The tool supports **DuckDB** (default) and **SQLite**; report can be generated from either. +- The CLI reports **timings**: export shows parse+extract vs. write; report shows query vs. render vs. write. Use `--verbose` to see them. diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..c57fe07 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,30 @@ +# Build and run unit tests on PRs targeting main and on pushes to main. + +name: CI + +on: + pull_request: + branches: [main] + push: + branches: [main] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: '10.0.x' + + - name: Restore + run: dotnet restore MemorySnapshotDataTools.sln + + - name: Build + run: dotnet build MemorySnapshotDataTools.sln -c Release --no-restore + + - name: Test + run: dotnet test MemorySnapshotDataTools.sln -c Release --no-build -v normal diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5cb2f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Build / output +bin/ +obj/ +artifacts/ +publish/ + +# IDE +*.user +*.suo +.vs/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Report artifacts +*.log +*.duckdb +*.db +*.html + +# Snapshots +*.snap +MemoryCaptures/ diff --git a/Cli/CliOptions.cs b/Cli/CliOptions.cs new file mode 100644 index 0000000..fe2a436 --- /dev/null +++ b/Cli/CliOptions.cs @@ -0,0 +1,53 @@ +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Cli; + +internal enum CommandKind +{ + Export, + Report, +} + +/// +/// Parsed CLI options passed from System.CommandLine handlers to RunExport/RunReport. +/// +internal sealed class CliOptions +{ + public CommandKind Command { get; set; } = CommandKind.Export; + public string SnapshotPath { get; set; } = string.Empty; + public string OutputDbPath { get; set; } = string.Empty; + public string ReportDbPath { get; set; } = string.Empty; + public string? ReportOutputPath { get; set; } + public string ReportTitle { get; set; } = "Memory Snapshot Report"; + public int BatchSize { get; set; } = 2048; + public int QueueCapacity { get; set; } = 256; + public ValidationMode Validate { get; set; } = ValidationMode.Minimal; + public DestinationKind Destination { get; set; } = DestinationKind.DuckDb; + public bool Verbose { get; set; } +} + +internal sealed class ConsoleProgress : IProgressReporter +{ + private readonly bool _verbose; + private readonly object _lock = new(); + private DateTime _lastWrite = DateTime.MinValue; + + public ConsoleProgress(bool verbose) + { + _verbose = verbose; + } + + public void Report(string message, bool force = false) + { + if (!_verbose && !force) + return; + + lock (_lock) + { + if (!force && DateTime.UtcNow - _lastWrite < TimeSpan.FromMilliseconds(250)) + return; + _lastWrite = DateTime.UtcNow; + Console.WriteLine($"[{DateTime.UtcNow:O}] {message}"); + } + } +} diff --git a/Cli/CommandLineBuilder.cs b/Cli/CommandLineBuilder.cs new file mode 100644 index 0000000..5f0b9bf --- /dev/null +++ b/Cli/CommandLineBuilder.cs @@ -0,0 +1,163 @@ +using System.CommandLine; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Cli; + +/// +/// Builds the root command and subcommands (export, report) using System.CommandLine. +/// +internal static class CommandLineBuilder +{ + public static RootCommand Build(Func runExport, Func runReport) + { + var root = new RootCommand("Export Unity memory snapshots to DuckDB or SQLite and generate HTML reports."); + + // ---- export ---- + var exportCmd = new Command("export", "Export a .snap file to a DuckDB or SQLite database."); + var snapshotArg = new Argument("snapshot") + { + Description = "Path to the Unity memory snapshot (.snap) file.", + Arity = ArgumentArity.ExactlyOne, + }; + var outputArg = new Argument("output") + { + Description = "Path to the output database (.duckdb or .db).", + Arity = ArgumentArity.ExactlyOne, + }; + exportCmd.Add(snapshotArg); + exportCmd.Add(outputArg); + + var batchSizeOpt = new Option("--batch-size") + { + Description = "Rows per produced batch.", + DefaultValueFactory = _ => 2048, + }; + var queueCapacityOpt = new Option("--queue-capacity") + { + Description = "Max queued batches.", + DefaultValueFactory = _ => 256, + }; + var validateOpt = new Option("--validate") + { + Description = "Validation mode: none, minimal, or full.", + DefaultValueFactory = _ => "minimal", + }; + validateOpt.AcceptOnlyFromAmong("none", "minimal", "full"); + var destinationOpt = new Option("--destination") + { + Description = "Export backend: duckdb or sqlite.", + DefaultValueFactory = _ => "duckdb", + }; + destinationOpt.AcceptOnlyFromAmong("duckdb", "sqlite"); + var verboseOpt = new Option("--verbose") + { + Description = "Print progress updates.", + }; + + exportCmd.Add(batchSizeOpt); + exportCmd.Add(queueCapacityOpt); + exportCmd.Add(validateOpt); + exportCmd.Add(destinationOpt); + exportCmd.Add(verboseOpt); + + exportCmd.SetAction((ParseResult parseResult) => + { + var snapshotPath = ExpandPath(parseResult.GetValue(snapshotArg)!); + var outputDbPath = ExpandPath(parseResult.GetValue(outputArg)!); + if (!File.Exists(snapshotPath)) + { + Console.Error.WriteLine($"Snapshot file not found: {snapshotPath}"); + return 1; + } + var options = new CliOptions + { + Command = CommandKind.Export, + SnapshotPath = snapshotPath, + OutputDbPath = outputDbPath, + BatchSize = parseResult.GetValue(batchSizeOpt), + QueueCapacity = parseResult.GetValue(queueCapacityOpt), + Validate = ParseValidationMode(parseResult.GetValue(validateOpt)!), + Destination = parseResult.GetValue(destinationOpt)!.ToLowerInvariant() == "sqlite" ? DestinationKind.Sqlite : DestinationKind.DuckDb, + Verbose = parseResult.GetValue(verboseOpt), + }; + return runExport(options); + }); + + // ---- report ---- + var reportCmd = new Command("report", "Generate an HTML report from an exported database."); + var databaseArg = new Argument("database") + { + Description = "Path to the exported database (.duckdb or .db).", + Arity = ArgumentArity.ExactlyOne, + }; + reportCmd.Add(databaseArg); + + var outOpt = new Option("--out") + { + Description = "Output HTML file path (default: temp file + open in browser).", + }; + var titleOpt = new Option("--title") + { + Description = "Report title.", + DefaultValueFactory = _ => "Memory Snapshot Report", + }; + var reportVerboseOpt = new Option("--verbose") + { + Description = "Print progress and timings.", + }; + + reportCmd.Add(outOpt); + reportCmd.Add(titleOpt); + reportCmd.Add(reportVerboseOpt); + + reportCmd.SetAction((ParseResult parseResult) => + { + var reportDbPath = ExpandPath(parseResult.GetValue(databaseArg)!); + if (!File.Exists(reportDbPath)) + { + Console.Error.WriteLine($"Database file not found: {reportDbPath}"); + return 1; + } + var outPath = parseResult.GetValue(outOpt); + var options = new CliOptions + { + Command = CommandKind.Report, + ReportDbPath = reportDbPath, + ReportOutputPath = string.IsNullOrWhiteSpace(outPath) ? null : ExpandPath(outPath), + ReportTitle = parseResult.GetValue(titleOpt)!, + Verbose = parseResult.GetValue(reportVerboseOpt), + }; + return runReport(options); + }); + + root.Add(exportCmd); + root.Add(reportCmd); + return root; + } + + private static ValidationMode ParseValidationMode(string value) + { + return value.ToLowerInvariant() switch + { + "none" => ValidationMode.None, + "minimal" => ValidationMode.Minimal, + "full" => ValidationMode.Full, + _ => ValidationMode.Minimal, + }; + } + + private static string ExpandPath(string value) + { + if (string.IsNullOrWhiteSpace(value)) + return value; + + var expanded = Environment.ExpandEnvironmentVariables(value); + if (expanded.StartsWith("~/", StringComparison.Ordinal) || expanded == "~") + { + var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); + var suffix = expanded.Length > 2 ? expanded[2..] : string.Empty; + expanded = Path.Combine(home, suffix); + } + return Path.GetFullPath(expanded); + } +} diff --git a/Cli/MemorySnapshotDataTools.Cli.csproj b/Cli/MemorySnapshotDataTools.Cli.csproj new file mode 100644 index 0000000..8950491 --- /dev/null +++ b/Cli/MemorySnapshotDataTools.Cli.csproj @@ -0,0 +1,19 @@ + + + Exe + net10.0 + enable + enable + latest + MemorySnapshotDataTools.Cli + MemorySnapshotDataTools + 0.1.0 + true + true + + + + + + + diff --git a/Cli/Program.cs b/Cli/Program.cs new file mode 100644 index 0000000..86fdc75 --- /dev/null +++ b/Cli/Program.cs @@ -0,0 +1,126 @@ +using System.Diagnostics; +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.Export; +using MemorySnapshotDataTools.ExportDestination; +using MemorySnapshotDataTools.Parser; +using MemorySnapshotDataTools.Report; + +namespace MemorySnapshotDataTools.Cli; + +internal static class Program +{ + private static int Main(string[] args) + { + var root = CommandLineBuilder.Build(RunExport, RunReport); + return root.Parse(args).Invoke(); + } + + private static int RunExport(CliOptions options) + { + var destination = ExportDestinationFactory.Create(options.Destination); + var progress = new ConsoleProgress(options.Verbose); + progress.Report($"Backend: {destination.DestinationName}", force: true); + + using var cts = new CancellationTokenSource(); + Console.CancelKeyPress += (_, e) => + { + e.Cancel = true; + cts.Cancel(); + }; + + try + { + var sw = Stopwatch.StartNew(); + + var exportOptions = new ExportRunOptions + { + OutputDbPath = options.OutputDbPath, + BatchSize = options.BatchSize, + QueueCapacity = options.QueueCapacity, + Validate = options.Validate, + }; + + var extractSw = Stopwatch.StartNew(); + var rawData = RunStage("snapshot-extract", progress, () => SnapshotBridge.ExtractRawData(options.SnapshotPath, progress, cts.Token)); + extractSw.Stop(); + + var pipelineSw = Stopwatch.StartNew(); + var counts = RunStage("pipeline-write", progress, () => ExportPipeline.Run(exportOptions, rawData, destination, progress, cts.Token)); + pipelineSw.Stop(); + + var validationSw = Stopwatch.StartNew(); + RunStage("validation", progress, () => destination.Validate(options.OutputDbPath, rawData, options.Validate)); + validationSw.Stop(); + + counts.TotalMs = sw.ElapsedMilliseconds; + var pipelineRps = pipelineSw.ElapsedMilliseconds > 0 + ? rawData.TotalRows * 1000.0 / pipelineSw.ElapsedMilliseconds + : 0.0; + + progress.Report( + $"Done. backend={destination.DestinationName}, native_objects={counts.NativeObjects}, managed_objects={counts.ManagedObjects}, connections={counts.Connections}, native_roots={counts.NativeRoots}, " + + $"memory_regions={counts.MemoryRegions}, native_allocations={counts.NativeAllocations}, " + + $"extract_ms={extractSw.ElapsedMilliseconds}, pipeline_ms={pipelineSw.ElapsedMilliseconds}, validation_ms={validationSw.ElapsedMilliseconds}, total_ms={counts.TotalMs}, " + + $"pipeline_rps={pipelineRps:N0}, backend_insert_ms={counts.BackendInsertMs}, backend_commit_ms={counts.BackendCommitMs}, backend_index_ms={counts.BackendIndexBuildMs}, " + + $"insert_ms_by_table(native={counts.NativeObjectInsertMs}, managed={counts.ManagedObjectInsertMs}, connections={counts.ConnectionInsertMs}, roots={counts.NativeRootInsertMs}, regions={counts.MemoryRegionInsertMs}, allocations={counts.NativeAllocationInsertMs})"); + return 0; + } + catch (OperationCanceledException) + { + Console.Error.WriteLine("Export cancelled."); + return 2; + } + catch (Exception ex) + { + Console.Error.WriteLine("Export failed."); + if (ex is ExportStageException stageEx) + { + Console.Error.WriteLine($"Failure stage: {stageEx.Stage}"); + Console.Error.WriteLine(stageEx.InnerException ?? stageEx); + } + else + { + Console.Error.WriteLine(ex); + } + return 3; + } + } + + private static int RunReport(CliOptions options) + { + var reportOptions = new ReportRunOptions + { + ReportDbPath = options.ReportDbPath, + ReportOutputPath = options.ReportOutputPath, + ReportTitle = options.ReportTitle, + }; + var progress = new ConsoleProgress(options.Verbose); + return ReportRunner.Run(reportOptions, progress); + } + + private static void RunStage(string stage, ConsoleProgress progress, Action action) + { + progress.Report($"[{stage}] start", force: true); + try + { + action(); + } + catch (Exception ex) when (ex is not ExportStageException) + { + throw new ExportStageException(stage, ex); + } + } + + private static T RunStage(string stage, ConsoleProgress progress, Func action) + { + progress.Report($"[{stage}] start", force: true); + try + { + return action(); + } + catch (Exception ex) when (ex is not ExportStageException) + { + throw new ExportStageException(stage, ex); + } + } +} diff --git a/Core/Export/ExportErrors.cs b/Core/Export/ExportErrors.cs new file mode 100644 index 0000000..d70d434 --- /dev/null +++ b/Core/Export/ExportErrors.cs @@ -0,0 +1,19 @@ +namespace MemorySnapshotDataTools.Export; + +/// +/// Exception thrown when an export stage (e.g. extract, write, validate) fails. Wraps the inner exception and records the stage name. +/// +public sealed class ExportStageException : Exception +{ + /// Creates an exception for a failed export stage. + /// Name of the stage that failed (e.g. "extract", "write"). + /// The underlying exception. + public ExportStageException(string stage, Exception innerException) + : base($"Stage '{stage}' failed.", innerException) + { + Stage = stage; + } + + /// Name of the export stage that failed. + public string Stage { get; } +} diff --git a/Core/Export/ExportPipeline.cs b/Core/Export/ExportPipeline.cs new file mode 100644 index 0000000..cc01aa0 --- /dev/null +++ b/Core/Export/ExportPipeline.cs @@ -0,0 +1,266 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Runtime.ExceptionServices; +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.ExportDestination; + +namespace MemorySnapshotDataTools.Export; + +/// +/// Orchestrates parallel export of to a database: producers materialize batches per table, +/// a single writer consumes from a bounded queue and writes via . +/// Reports progress and respects cancellation. +/// +public static class ExportPipeline +{ + /// Minimum interval (ms) between progress reports during materialize+write to avoid flooding the console. + private const int ProgressReportIntervalMs = 350; + + /// Sleep (ms) between monitor loop iterations when waiting on producers or writer. + private const int MonitorPollIntervalMs = 125; + + /// + /// Runs the full export: starts the destination writer and parallel producers, monitors until completion, then returns counts and timings. + /// Validates that materialized and written row counts match the raw data. + /// + /// Batch size, queue capacity, output path. + /// Extracted snapshot data to export. + /// Writer implementation (DuckDB or SQLite). + /// Progress reporter. + /// Cancellation token. + /// Row counts and timing statistics. + /// If materialized or written row counts do not match. + /// When is cancelled. + public static ExportCounts Run(ExportRunOptions options, RawSnapshotData rawData, IExportDestinationWriter destination, IProgressReporter progress, CancellationToken token) + { + var counts = new ExportCounts(); + var state = new PipelineState(rawData.TotalRows); + using var cts = CancellationTokenSource.CreateLinkedTokenSource(token); + var queue = new BlockingCollection(options.QueueCapacity); + + progress.Report($"Starting {destination.DestinationName} writer with {rawData.TotalRows:N0} total rows...", force: true); + var writerTask = Task.Run( + () => destination.ConsumeAndWrite(options.OutputDbPath, rawData.SnapshotInfo, queue, state, cts.Token), + cts.Token); + + var materializeSw = Stopwatch.StartNew(); + var producerTasks = new[] + { + Task.Run(() => ProduceNativeRoots(rawData.NativeRoots, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceMemoryRegions(rawData.MemoryRegions, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceNativeAllocations(rawData.NativeAllocations, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceNativeObjects(rawData.NativeObjects, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceManagedObjects(rawData.ManagedObjects, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceConnections(rawData.Connections, queue, state, options.BatchSize, cts.Token), cts.Token), + }; + + MonitorOverlap(producerTasks, writerTask, queue, progress, state, options.QueueCapacity, cts); + materializeSw.Stop(); + + var writeSw = Stopwatch.StartNew(); + MonitorWriter(writerTask, progress, state, options.QueueCapacity, cts); + writeSw.Stop(); + var writeStats = writerTask.GetAwaiter().GetResult(); + + counts.NativeObjects = rawData.NativeObjects.Count; + counts.ManagedObjects = rawData.ManagedObjects.Count; + counts.Connections = rawData.Connections.Count; + counts.NativeRoots = rawData.NativeRoots.Count; + counts.MemoryRegions = rawData.MemoryRegions.Count; + counts.NativeAllocations = rawData.NativeAllocations.Count; + counts.MaterializeMs = materializeSw.ElapsedMilliseconds; + counts.WriteMs = writeSw.ElapsedMilliseconds; + counts.BackendInsertMs = writeStats.TotalInsertMs; + counts.BackendCommitMs = writeStats.CommitMs; + counts.BackendIndexBuildMs = writeStats.IndexBuildMs; + counts.NativeObjectInsertMs = writeStats.NativeObjectInsertMs; + counts.ManagedObjectInsertMs = writeStats.ManagedObjectInsertMs; + counts.ConnectionInsertMs = writeStats.ConnectionInsertMs; + counts.NativeRootInsertMs = writeStats.NativeRootInsertMs; + counts.MemoryRegionInsertMs = writeStats.MemoryRegionInsertMs; + counts.NativeAllocationInsertMs = writeStats.NativeAllocationInsertMs; + + if (state.MaterializedRows != rawData.TotalRows) + throw new InvalidOperationException($"Materialized rows mismatch. expected={rawData.TotalRows}, actual={state.MaterializedRows}"); + if (state.WrittenRows != rawData.TotalRows + 1) + throw new InvalidOperationException($"Written rows mismatch. expected={rawData.TotalRows + 1}, actual={state.WrittenRows}"); + + return counts; + } + + private static void MonitorOverlap( + Task[] producerTasks, + Task writerTask, + BlockingCollection queue, + IProgressReporter progress, + PipelineState state, + int queueCapacity, + CancellationTokenSource cts) + { + var lastWrite = DateTime.MinValue; + while (producerTasks.Any(t => !t.IsCompleted)) + { + ThrowIfFaulted(producerTasks, writerTask); + var produced = state.MaterializedRows; + var written = Math.Max(0, state.WrittenRows - 1); + if (DateTime.UtcNow - lastWrite > TimeSpan.FromMilliseconds(ProgressReportIntervalMs)) + { + progress.Report( + $"parallel materialize+write: produced={produced:N0}/{state.TotalRows:N0}, written={written:N0}/{state.TotalRows:N0}, queued={state.QueuedBatchCount:N0}/{queueCapacity:N0}"); + lastWrite = DateTime.UtcNow; + } + + Thread.Sleep(MonitorPollIntervalMs); + } + + Task.WaitAll(producerTasks); + queue.CompleteAdding(); + progress.Report($"Materialization complete ({state.MaterializedRows:N0}/{state.TotalRows:N0}).", force: true); + } + + private static void MonitorWriter(Task writerTask, IProgressReporter progress, PipelineState state, int queueCapacity, CancellationTokenSource cts) + { + var lastWrite = DateTime.MinValue; + while (!writerTask.IsCompleted) + { + if (writerTask.IsFaulted) + RethrowTaskException(writerTask, "Writer task failed."); + if (writerTask.IsCanceled) + throw new OperationCanceledException(); + + if (DateTime.UtcNow - lastWrite > TimeSpan.FromMilliseconds(ProgressReportIntervalMs)) + { + progress.Report($"writing: written={Math.Max(0, state.WrittenRows - 1):N0}/{state.TotalRows:N0}, queued={state.QueuedBatchCount:N0}/{queueCapacity:N0}"); + lastWrite = DateTime.UtcNow; + } + Thread.Sleep(MonitorPollIntervalMs); + } + + progress.Report($"Write complete ({Math.Max(0, state.WrittenRows - 1):N0}/{state.TotalRows:N0}).", force: true); + } + + private static void ProduceNativeRoots(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeRootRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeRoots(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceNativeObjects(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeObjectRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeObjects(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceMemoryRegions(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new MemoryRegionRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForMemoryRegions(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceNativeAllocations(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeAllocationRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeAllocations(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceManagedObjects(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new ManagedObjectRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForManagedObjects(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceConnections(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new ConnectionRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForConnections(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceBatches(int totalCount, int batchSize, CancellationToken token, Action processBatch) + { + if (totalCount <= 0) + return; + + var batchCount = (totalCount + batchSize - 1) / batchSize; + var starts = new int[batchCount]; + for (var i = 0; i < batchCount; i++) + starts[i] = i * batchSize; + Parallel.ForEach(starts, new ParallelOptions + { + CancellationToken = token, + MaxDegreeOfParallelism = Math.Max(1, Environment.ProcessorCount), + }, start => + { + token.ThrowIfCancellationRequested(); + processBatch(start); + }); + } + + private static void ThrowIfFaulted(Task[] producerTasks, Task writerTask) + { + foreach (var task in producerTasks) + { + if (task.IsFaulted) + RethrowTaskException(task, "Producer task failed."); + if (task.IsCanceled) + throw new OperationCanceledException(); + } + + if (writerTask.IsFaulted) + RethrowTaskException(writerTask, "Writer task failed."); + if (writerTask.IsCanceled) + throw new OperationCanceledException(); + } + + private static void RethrowTaskException(Task task, string fallbackMessage) + { + var aggregate = task.Exception; + if (aggregate == null) + throw new InvalidOperationException(fallbackMessage); + + var inner = aggregate.InnerException ?? aggregate; + ExceptionDispatchInfo.Capture(inner).Throw(); + throw new InvalidOperationException(fallbackMessage); + } +} diff --git a/Core/ExportDestination/DuckDbExportDestination.cs b/Core/ExportDestination/DuckDbExportDestination.cs new file mode 100644 index 0000000..84661d1 --- /dev/null +++ b/Core/ExportDestination/DuckDbExportDestination.cs @@ -0,0 +1,426 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using DuckDB.NET.Data; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// DuckDB implementation of . Writes snapshot tables to a .duckdb file using DuckDB appenders, +/// then builds indexes. Supports validation via row counts and optional referential checks. +/// +internal sealed class DuckDbExportDestination : IExportDestinationWriter +{ + /// + public string DestinationName => "duckdb"; + + #region ConsumeAndWrite + + /// + public WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + { + var directory = Path.GetDirectoryName(dbPath); + if (!string.IsNullOrEmpty(directory)) + Directory.CreateDirectory(directory); + + // Remove any existing DuckDB files so we start fresh. + // DuckDB creates a WAL alongside the main file; both must be deleted to avoid replay. + foreach (var suffix in new[] { "", ".wal" }) + { + var f = dbPath + suffix; + if (File.Exists(f)) + File.Delete(f); + } + + using var connection = new DuckDBConnection($"Data Source={dbPath}"); + connection.Open(); + + var stats = new WriteStats(); + + // Create schema + Exec(connection, SchemaTablesScript); + + // Insert snapshot_info using positional parameters (DuckDB uses ? placeholders) + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = "INSERT INTO snapshot_info(snapshot_path, exported_at_utc, unity_version) VALUES (?, ?, ?);"; + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.SnapshotPath }); + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.ExportedAtUtc }); + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.UnityVersion ?? (object)DBNull.Value }); + cmd.ExecuteNonQuery(); + } + state.AddWritten(1); + + var insertSw = Stopwatch.StartNew(); + + // Appenders are scoped so disposal (= flush+commit) is timed separately. + using (var nativeAppender = connection.CreateAppender("native_objects")) + using (var managedAppender = connection.CreateAppender("managed_objects")) + using (var connectionAppender = connection.CreateAppender("connections")) + using (var rootAppender = connection.CreateAppender("native_roots")) + using (var regionAppender = connection.CreateAppender("memory_regions")) + using (var allocationAppender = connection.CreateAppender("native_allocations")) + { + foreach (var batch in queue.GetConsumingEnumerable(token)) + { + token.ThrowIfCancellationRequested(); + state.DecrementQueuedBatches(); + switch (batch.Kind) + { + case WriteBatchKind.NativeObjects: + var nativeSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeObjects) + { + // INTEGER columns get int, BIGINT columns get long (type must match exactly) + nativeAppender.CreateRow() + .AppendValue(row.NativeObjectIndex) // int → INTEGER + .AppendValue(row.InstanceId ?? string.Empty) // string → VARCHAR + .AppendValue(row.Name ?? string.Empty) // string → VARCHAR + .AppendValue(unchecked((long)row.SizeBytes)) // ulong → BIGINT + .AppendValue(row.TypeIndex) // int → INTEGER + .AppendValue(row.NativeTypeName ?? string.Empty) // string → VARCHAR + .AppendValue(row.IsDestroyed) // bool → BOOLEAN + .EndRow(); + } + nativeSw.Stop(); + stats.NativeObjectRows += batch.NativeObjects.Length; + stats.NativeObjectInsertMs += nativeSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeObjects.Length); + break; + + case WriteBatchKind.ManagedObjects: + var managedSw = Stopwatch.StartNew(); + foreach (var row in batch.ManagedObjects) + { + var r = managedAppender.CreateRow() + .AppendValue(row.ManagedObjectIndex) // int → INTEGER + .AppendValue(unchecked((long)row.Address)) // ulong → BIGINT + .AppendValue(row.SizeBytes) // long → BIGINT + .AppendValue(row.TypeIndex) // int → INTEGER + .AppendValue(row.ManagedTypeName ?? string.Empty); // VARCHAR + if (row.NativeObjectIndex >= 0) + r.AppendValue(row.NativeObjectIndex); // long → BIGINT + else + r.AppendNullValue(); + r.EndRow(); + } + managedSw.Stop(); + stats.ManagedObjectRows += batch.ManagedObjects.Length; + stats.ManagedObjectInsertMs += managedSw.ElapsedMilliseconds; + state.AddWritten(batch.ManagedObjects.Length); + break; + + case WriteBatchKind.Connections: + var connSw = Stopwatch.StartNew(); + foreach (var row in batch.Connections) + { + connectionAppender.CreateRow() + .AppendValue(row.FromKind ?? string.Empty) + .AppendValue(row.FromIndex) + .AppendValue(row.ToKind ?? string.Empty) + .AppendValue(row.ToIndex) + .AppendValue(row.ConnectionType ?? string.Empty) + .EndRow(); + } + connSw.Stop(); + stats.ConnectionRows += batch.Connections.Length; + stats.ConnectionInsertMs += connSw.ElapsedMilliseconds; + state.AddWritten(batch.Connections.Length); + break; + + case WriteBatchKind.NativeRoots: + var rootSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeRoots) + { + rootAppender.CreateRow() + .AppendValue(row.RootIndex) // int → INTEGER + .AppendValue(row.RootId) // long → BIGINT + .AppendValue(row.AreaName ?? string.Empty) // VARCHAR + .AppendValue(row.ObjectName ?? string.Empty) // VARCHAR + .AppendValue(unchecked((long)row.AccumulatedSizeBytes)) // ulong → BIGINT + .EndRow(); + } + rootSw.Stop(); + stats.NativeRootRows += batch.NativeRoots.Length; + stats.NativeRootInsertMs += rootSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeRoots.Length); + break; + + case WriteBatchKind.MemoryRegions: + var regionSw = Stopwatch.StartNew(); + foreach (var row in batch.MemoryRegions) + { + var r = regionAppender.CreateRow() + .AppendValue(row.RegionIndex) // int → INTEGER + .AppendValue(unchecked((long)row.AddressBase)) // ulong → BIGINT + .AppendValue(unchecked((long)row.AddressSize)) // ulong → BIGINT + .AppendValue(row.Name ?? string.Empty); // VARCHAR + if (row.ParentRegionIndex >= 0) + r.AppendValue(row.ParentRegionIndex); // int → INTEGER + else + r.AppendNullValue(); + if (row.FirstAllocationIndex >= 0) + r.AppendValue(row.FirstAllocationIndex); // int → INTEGER + else + r.AppendNullValue(); + r.AppendValue(row.NumAllocations).EndRow(); // int → INTEGER + } + regionSw.Stop(); + stats.MemoryRegionRows += batch.MemoryRegions.Length; + stats.MemoryRegionInsertMs += regionSw.ElapsedMilliseconds; + state.AddWritten(batch.MemoryRegions.Length); + break; + + case WriteBatchKind.NativeAllocations: + var allocSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeAllocations) + { + var r = allocationAppender.CreateRow() + .AppendValue(row.AllocationIndex) // int → INTEGER + .AppendValue(unchecked((long)row.Address)) // ulong → BIGINT + .AppendValue(unchecked((long)row.SizeBytes)) // ulong → BIGINT + .AppendValue(unchecked((long)row.OverheadSizeBytes)) // ulong → BIGINT + .AppendValue(unchecked((long)row.PaddingSizeBytes)); // ulong → BIGINT + if (row.MemoryRegionIndex >= 0) + r.AppendValue(row.MemoryRegionIndex); // int → INTEGER + else + r.AppendNullValue(); + r.EndRow(); + } + allocSw.Stop(); + stats.NativeAllocationRows += batch.NativeAllocations.Length; + stats.NativeAllocationInsertMs += allocSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeAllocations.Length); + break; + } + } + } // appenders disposed (flushed + committed) here + + insertSw.Stop(); + stats.TotalInsertMs = insertSw.ElapsedMilliseconds; + // CommitMs is included in TotalInsertMs since disposal happens inside the timed scope. + stats.CommitMs = 0; + + var indexSw = Stopwatch.StartNew(); + Exec(connection, CreateIndexesScript); + indexSw.Stop(); + stats.IndexBuildMs = indexSw.ElapsedMilliseconds; + + return stats; + } + + #endregion + + #region Validation + + /// + public void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + { + if (mode == ValidationMode.None) + return; + + using var connection = new DuckDBConnection($"Data Source={dbPath}"); + connection.Open(); + + var nativeCount = QueryCount(connection, "SELECT COUNT(*) FROM native_objects;"); + var managedCount = QueryCount(connection, "SELECT COUNT(*) FROM managed_objects;"); + var connectionCount = QueryCount(connection, "SELECT COUNT(*) FROM connections;"); + var rootCount = QueryCount(connection, "SELECT COUNT(*) FROM native_roots;"); + var regionCount = QueryCount(connection, "SELECT COUNT(*) FROM memory_regions;"); + var allocationCount = QueryCount(connection, "SELECT COUNT(*) FROM native_allocations;"); + + if (nativeCount != rawData.NativeObjects.Count || + managedCount != rawData.ManagedObjects.Count || + connectionCount != rawData.Connections.Count || + rootCount != rawData.NativeRoots.Count || + regionCount != rawData.MemoryRegions.Count || + allocationCount != rawData.NativeAllocations.Count) + { + throw new InvalidOperationException( + $"DuckDB validation count mismatch. " + + $"expected=(native={rawData.NativeObjects.Count}, managed={rawData.ManagedObjects.Count}, " + + $"connections={rawData.Connections.Count}, roots={rawData.NativeRoots.Count}, " + + $"regions={rawData.MemoryRegions.Count}, allocations={rawData.NativeAllocations.Count}) " + + $"actual=(native={nativeCount}, managed={managedCount}, connections={connectionCount}, " + + $"roots={rootCount}, regions={regionCount}, allocations={allocationCount})"); + } + + if (mode == ValidationMode.Full) + { + var duplicateNativeKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT native_object_index, COUNT(*) c FROM native_objects GROUP BY native_object_index HAVING c > 1);"); + var duplicateManagedKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT managed_object_index, COUNT(*) c FROM managed_objects GROUP BY managed_object_index HAVING c > 1);"); + var duplicateRegionKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT region_index, COUNT(*) c FROM memory_regions GROUP BY region_index HAVING c > 1);"); + var duplicateAllocationKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT allocation_index, COUNT(*) c FROM native_allocations GROUP BY allocation_index HAVING c > 1);"); + if (duplicateNativeKeys > 0 || duplicateManagedKeys > 0 || duplicateRegionKeys > 0 || duplicateAllocationKeys > 0) + throw new InvalidOperationException("DuckDB validation failed: duplicate primary key rows found."); + + var orphanFromManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 FROM managed_objects m WHERE m.managed_object_index = c.from_index + ); + """); + var orphanFromNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 FROM native_objects n WHERE n.native_object_index = c.from_index + ); + """); + var orphanToManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 FROM managed_objects m WHERE m.managed_object_index = c.to_index + ); + """); + var orphanToNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 FROM native_objects n WHERE n.native_object_index = c.to_index + ); + """); + var unknownKinds = QueryCount(connection, """ + SELECT COUNT(*) FROM connections + WHERE from_kind NOT IN ('managed_object','native_object') + OR to_kind NOT IN ('managed_object','native_object'); + """); + var orphanAllocationRegionRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM native_allocations a + WHERE a.memory_region_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM memory_regions r WHERE r.region_index = a.memory_region_index + ); + """); + var orphanRegionFirstAllocationRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM memory_regions r + WHERE r.first_allocation_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM native_allocations a WHERE a.allocation_index = r.first_allocation_index + ); + """); + + if (orphanFromManaged > 0 || orphanFromNative > 0 || orphanToManaged > 0 || orphanToNative > 0 || unknownKinds > 0 || + orphanAllocationRegionRefs > 0 || orphanRegionFirstAllocationRefs > 0) + { + throw new InvalidOperationException( + $"DuckDB validation failed: invalid graph or memory-map references. " + + $"orphan_from_managed={orphanFromManaged}, orphan_from_native={orphanFromNative}, " + + $"orphan_to_managed={orphanToManaged}, orphan_to_native={orphanToNative}, unknown_kinds={unknownKinds}, " + + $"orphan_allocation_region_refs={orphanAllocationRegionRefs}, orphan_region_first_allocation_refs={orphanRegionFirstAllocationRefs}"); + } + } + } + + #endregion + + #region Helpers + + private static void Exec(DuckDBConnection connection, string sql) + { + // DuckDB doesn't support multiple statements in one ExecuteNonQuery call; + // split on semicolons and run each statement individually. + foreach (var stmt in sql.Split(';', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries)) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = stmt; + cmd.ExecuteNonQuery(); + } + } + + private static long QueryCount(DuckDBConnection connection, string sql) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = sql; + var result = cmd.ExecuteScalar(); + return Convert.ToInt64(result); + } + + #endregion + + #region Schema + + // Column types must match C# value types passed to the Appender exactly + // (DuckDB Appender reads raw bytes; passing int to BIGINT column corrupts data). + // int → INTEGER (32-bit), long/ulong-cast → BIGINT (64-bit). + private const string SchemaTablesScript = """ +CREATE OR REPLACE TABLE snapshot_info ( + snapshot_path VARCHAR NOT NULL, + exported_at_utc VARCHAR NOT NULL, + unity_version VARCHAR +); + +CREATE OR REPLACE TABLE native_objects ( + native_object_index INTEGER PRIMARY KEY, + instance_id VARCHAR, + name VARCHAR, + size_bytes BIGINT NOT NULL, + type_index INTEGER, + native_type_name VARCHAR, + is_destroyed BOOLEAN NOT NULL +); + +CREATE OR REPLACE TABLE managed_objects ( + managed_object_index INTEGER PRIMARY KEY, + address BIGINT NOT NULL, + size_bytes BIGINT NOT NULL, + type_index INTEGER, + managed_type_name VARCHAR, + native_object_index BIGINT +); + +CREATE OR REPLACE TABLE connections ( + from_kind VARCHAR NOT NULL, + from_index BIGINT NOT NULL, + to_kind VARCHAR NOT NULL, + to_index BIGINT NOT NULL, + connection_type VARCHAR NOT NULL +); + +CREATE OR REPLACE TABLE native_roots ( + root_index INTEGER PRIMARY KEY, + root_id BIGINT NOT NULL, + area_name VARCHAR, + object_name VARCHAR, + accumulated_size_bytes BIGINT NOT NULL +); + +CREATE OR REPLACE TABLE memory_regions ( + region_index INTEGER PRIMARY KEY, + address_base BIGINT NOT NULL, + address_size BIGINT NOT NULL, + name VARCHAR, + parent_region_index INTEGER, + first_allocation_index INTEGER, + num_allocations INTEGER NOT NULL +); + +CREATE OR REPLACE TABLE native_allocations ( + allocation_index INTEGER PRIMARY KEY, + address BIGINT NOT NULL, + size_bytes BIGINT NOT NULL, + overhead_size_bytes BIGINT NOT NULL, + padding_size_bytes BIGINT NOT NULL, + memory_region_index INTEGER +); +"""; + + private const string CreateIndexesScript = """ +CREATE INDEX idx_connections_from ON connections(from_kind, from_index); +CREATE INDEX idx_connections_to ON connections(to_kind, to_index); +CREATE INDEX idx_native_objects_instance_id ON native_objects(instance_id); +CREATE INDEX idx_native_objects_is_destroyed ON native_objects(is_destroyed); +CREATE INDEX idx_managed_objects_address ON managed_objects(address); +CREATE INDEX idx_memory_regions_address_base ON memory_regions(address_base); +CREATE INDEX idx_native_allocations_address ON native_allocations(address); +CREATE INDEX idx_native_allocations_region ON native_allocations(memory_region_index); +"""; + + #endregion +} diff --git a/Core/ExportDestination/ExportDestinationFactory.cs b/Core/ExportDestination/ExportDestinationFactory.cs new file mode 100644 index 0000000..6d89c4f --- /dev/null +++ b/Core/ExportDestination/ExportDestinationFactory.cs @@ -0,0 +1,18 @@ +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Factory for creating the appropriate based on . +/// +public static class ExportDestinationFactory +{ + /// Creates a writer for the specified database backend. + /// DuckDB or SQLite. + /// An implementation of . + /// If is not a known value. + public static IExportDestinationWriter Create(DestinationKind kind) => kind switch + { + DestinationKind.DuckDb => new DuckDbExportDestination(), + DestinationKind.Sqlite => new SqliteExportDestination(), + _ => throw new ArgumentOutOfRangeException(nameof(kind), kind, null), + }; +} diff --git a/Core/ExportDestination/IExportDestinationWriter.cs b/Core/ExportDestination/IExportDestinationWriter.cs new file mode 100644 index 0000000..37dec7e --- /dev/null +++ b/Core/ExportDestination/IExportDestinationWriter.cs @@ -0,0 +1,38 @@ +using System.Collections.Concurrent; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Abstraction for writing snapshot data to a database. Implementations (e.g. DuckDB, SQLite) consume from a queue, +/// write to the given path, update , and optionally support post-write validation. +/// +public interface IExportDestinationWriter +{ + /// Display name of the destination (e.g. "DuckDB", "SQLite") for progress and errors. + string DestinationName { get; } + + /// + /// Consumes batches from the queue until is true, writes all tables to the database, + /// and returns per-table row counts and timings. Updates as batches are written. + /// + /// Output database file path. + /// Metadata to write (e.g. to snapshot_info table). + /// Bounded queue of write batches; adding is completed by the pipeline when producers finish. + /// Shared state to update (written rows, queued batch count). + /// Cancellation token. + /// Per-table row counts and insert/commit/index timings. + WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token); + + /// + /// Runs optional validation on the written database (e.g. row count checks, referential integrity) according to . + /// + /// Path to the database file. + /// Original snapshot data used for expected counts. + /// Validation level (none, minimal, full). + void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode); +} diff --git a/Core/ExportDestination/SqliteExportDestination.cs b/Core/ExportDestination/SqliteExportDestination.cs new file mode 100644 index 0000000..dfa2b64 --- /dev/null +++ b/Core/ExportDestination/SqliteExportDestination.cs @@ -0,0 +1,26 @@ +using System.Collections.Concurrent; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// SQLite implementation of . Delegates to for writing and validation. +/// Writes snapshot tables to a .db file with WAL mode and bulk inserts. +/// +internal sealed class SqliteExportDestination : IExportDestinationWriter +{ + /// + public string DestinationName => "sqlite"; + + /// + public WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + => SqliteWriter.ConsumeAndWrite(dbPath, snapshotInfo, queue, state, token); + + /// + public void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + => SqliteWriter.Validate(dbPath, rawData, mode); +} diff --git a/Core/ExportDestination/SqliteWriter.cs b/Core/ExportDestination/SqliteWriter.cs new file mode 100644 index 0000000..7a01266 --- /dev/null +++ b/Core/ExportDestination/SqliteWriter.cs @@ -0,0 +1,721 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Text; +using Microsoft.Data.Sqlite; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Static helper for writing snapshot data to SQLite: schema creation, bulk inserts from queue, +/// and optional validation (row counts and referential integrity). +/// Used by . +/// +internal static class SqliteWriter +{ + private const int MaxSqlParametersPerStatement = 900; + private const int DefaultRowsPerBulkInsert = 128; + + #region Validation + + /// + /// Validates the database at : for minimal mode checks row counts against ; + /// for full mode also checks primary key uniqueness and connection/region/allocation referential integrity. + /// + /// Path to the SQLite database file. + /// Expected snapshot data for count comparison. + /// Validation level (none, minimal, full). + /// If counts or referential checks fail. + public static void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + { + if (mode == ValidationMode.None) + return; + + using var connection = new SqliteConnection($"Data Source={dbPath}"); + connection.Open(); + + var nativeCount = QueryCount(connection, "SELECT COUNT(*) FROM native_objects;"); + var managedCount = QueryCount(connection, "SELECT COUNT(*) FROM managed_objects;"); + var connectionCount = QueryCount(connection, "SELECT COUNT(*) FROM connections;"); + var rootCount = QueryCount(connection, "SELECT COUNT(*) FROM native_roots;"); + var regionCount = QueryCount(connection, "SELECT COUNT(*) FROM memory_regions;"); + var allocationCount = QueryCount(connection, "SELECT COUNT(*) FROM native_allocations;"); + + if (nativeCount != rawData.NativeObjects.Count || + managedCount != rawData.ManagedObjects.Count || + connectionCount != rawData.Connections.Count || + rootCount != rawData.NativeRoots.Count || + regionCount != rawData.MemoryRegions.Count || + allocationCount != rawData.NativeAllocations.Count) + { + throw new InvalidOperationException("SQLite validation count mismatch between extracted rows and persisted rows."); + } + + if (mode == ValidationMode.Full) + { + // Quick full-mode sanity check on key uniqueness and not-null semantics. + var duplicateNativeKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT native_object_index, COUNT(*) c FROM native_objects GROUP BY native_object_index HAVING c > 1);"); + var duplicateManagedKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT managed_object_index, COUNT(*) c FROM managed_objects GROUP BY managed_object_index HAVING c > 1);"); + var duplicateRegionKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT region_index, COUNT(*) c FROM memory_regions GROUP BY region_index HAVING c > 1);"); + var duplicateAllocationKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT allocation_index, COUNT(*) c FROM native_allocations GROUP BY allocation_index HAVING c > 1);"); + if (duplicateNativeKeys > 0 || duplicateManagedKeys > 0 || duplicateRegionKeys > 0 || duplicateAllocationKeys > 0) + throw new InvalidOperationException("SQLite validation failed: duplicate primary key rows found."); + + var orphanFromManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 + FROM managed_objects m + WHERE m.managed_object_index = c.from_index + ); + """); + var orphanFromNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 + FROM native_objects n + WHERE n.native_object_index = c.from_index + ); + """); + var orphanToManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 + FROM managed_objects m + WHERE m.managed_object_index = c.to_index + ); + """); + var orphanToNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 + FROM native_objects n + WHERE n.native_object_index = c.to_index + ); + """); + var unknownKinds = QueryCount(connection, """ + SELECT COUNT(*) FROM connections + WHERE from_kind NOT IN ('managed_object','native_object') + OR to_kind NOT IN ('managed_object','native_object'); + """); + var orphanAllocationRegionRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM native_allocations a + WHERE a.memory_region_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM memory_regions r + WHERE r.region_index = a.memory_region_index + ); + """); + var orphanRegionFirstAllocationRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM memory_regions r + WHERE r.first_allocation_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM native_allocations a + WHERE a.allocation_index = r.first_allocation_index + ); + """); + + if (orphanFromManaged > 0 || orphanFromNative > 0 || orphanToManaged > 0 || orphanToNative > 0 || unknownKinds > 0 || + orphanAllocationRegionRefs > 0 || orphanRegionFirstAllocationRefs > 0) + { + throw new InvalidOperationException( + $"SQLite validation failed: invalid graph or memory-map references. " + + $"orphan_from_managed={orphanFromManaged}, orphan_from_native={orphanFromNative}, " + + $"orphan_to_managed={orphanToManaged}, orphan_to_native={orphanToNative}, unknown_kinds={unknownKinds}, " + + $"orphan_allocation_region_refs={orphanAllocationRegionRefs}, orphan_region_first_allocation_refs={orphanRegionFirstAllocationRefs}"); + } + } + } + + #endregion + + #region ConsumeAndWrite + + /// + /// Consumes batches from the queue, writes all tables to the SQLite database, and returns per-table row counts and timings. + /// Creates the directory for if needed, enables WAL mode, and runs schema creation and bulk inserts inside a transaction. + /// + /// Output database file path. + /// Metadata to insert into snapshot_info. + /// Bounded queue of write batches. + /// Shared pipeline state to update. + /// Cancellation token. + /// Per-table row counts and insert/commit/index timings. + public static WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + { + var directory = Path.GetDirectoryName(dbPath); + if (!string.IsNullOrEmpty(directory)) + Directory.CreateDirectory(directory); + + using var connection = new SqliteConnection($"Data Source={dbPath}"); + connection.Open(); + Exec(connection, null, "PRAGMA journal_mode=WAL;"); + Exec(connection, null, "PRAGMA synchronous=NORMAL;"); + Exec(connection, null, "PRAGMA temp_store=MEMORY;"); + Exec(connection, null, "PRAGMA cache_size=-200000;"); + + var stats = new WriteStats(); + + using var transaction = connection.BeginTransaction(); + try + { + ExecScript(connection, transaction, SchemaTablesScript); + + using var snapshotCmd = connection.CreateCommand(); + snapshotCmd.Transaction = transaction; + snapshotCmd.CommandText = "INSERT INTO snapshot_info(snapshot_path, exported_at_utc, unity_version) VALUES ($p, $e, $u);"; + snapshotCmd.Parameters.AddWithValue("$p", snapshotInfo.SnapshotPath); + snapshotCmd.Parameters.AddWithValue("$e", snapshotInfo.ExportedAtUtc); + snapshotCmd.Parameters.AddWithValue("$u", snapshotInfo.UnityVersion); + snapshotCmd.ExecuteNonQuery(); + state.AddWritten(1); + var insertSw = Stopwatch.StartNew(); + using var nativeCmd = PrepareNativeInsert(connection, transaction); + using var managedCmd = PrepareManagedInsert(connection, transaction); + using var connectionCmd = PrepareConnectionInsert(connection, transaction); + using var rootCmd = PrepareRootInsert(connection, transaction); + using var regionCmd = PrepareRegionInsert(connection, transaction); + using var allocationCmd = PrepareAllocationInsert(connection, transaction); + + foreach (var batch in queue.GetConsumingEnumerable(token)) + { + token.ThrowIfCancellationRequested(); + state.DecrementQueuedBatches(); + switch (batch.Kind) + { + case WriteBatchKind.NativeObjects: + var nativeSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeObjects) + { + nativeCmd.Parameters[0].Value = row.NativeObjectIndex; + nativeCmd.Parameters[1].Value = row.InstanceId ?? string.Empty; + nativeCmd.Parameters[2].Value = row.Name ?? string.Empty; + nativeCmd.Parameters[3].Value = unchecked((long)row.SizeBytes); + nativeCmd.Parameters[4].Value = row.TypeIndex; + nativeCmd.Parameters[5].Value = row.NativeTypeName ?? string.Empty; + nativeCmd.Parameters[6].Value = row.IsDestroyed ? 1 : 0; + nativeCmd.ExecuteNonQuery(); + } + nativeSw.Stop(); + stats.NativeObjectRows += batch.NativeObjects.Length; + stats.NativeObjectInsertMs += nativeSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeObjects.Length); + break; + + case WriteBatchKind.ManagedObjects: + var managedSw = Stopwatch.StartNew(); + foreach (var row in batch.ManagedObjects) + { + managedCmd.Parameters[0].Value = row.ManagedObjectIndex; + managedCmd.Parameters[1].Value = unchecked((long)row.Address); + managedCmd.Parameters[2].Value = row.SizeBytes; + managedCmd.Parameters[3].Value = row.TypeIndex; + managedCmd.Parameters[4].Value = row.ManagedTypeName ?? string.Empty; + managedCmd.Parameters[5].Value = row.NativeObjectIndex >= 0 ? row.NativeObjectIndex : DBNull.Value; + managedCmd.ExecuteNonQuery(); + } + managedSw.Stop(); + stats.ManagedObjectRows += batch.ManagedObjects.Length; + stats.ManagedObjectInsertMs += managedSw.ElapsedMilliseconds; + state.AddWritten(batch.ManagedObjects.Length); + break; + + case WriteBatchKind.Connections: + var connectionSw = Stopwatch.StartNew(); + foreach (var row in batch.Connections) + { + connectionCmd.Parameters[0].Value = row.FromKind ?? string.Empty; + connectionCmd.Parameters[1].Value = row.FromIndex; + connectionCmd.Parameters[2].Value = row.ToKind ?? string.Empty; + connectionCmd.Parameters[3].Value = row.ToIndex; + connectionCmd.Parameters[4].Value = row.ConnectionType ?? string.Empty; + connectionCmd.ExecuteNonQuery(); + } + connectionSw.Stop(); + stats.ConnectionRows += batch.Connections.Length; + stats.ConnectionInsertMs += connectionSw.ElapsedMilliseconds; + state.AddWritten(batch.Connections.Length); + break; + + case WriteBatchKind.NativeRoots: + var rootSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeRoots) + { + rootCmd.Parameters[0].Value = row.RootIndex; + rootCmd.Parameters[1].Value = row.RootId; + rootCmd.Parameters[2].Value = row.AreaName ?? string.Empty; + rootCmd.Parameters[3].Value = row.ObjectName ?? string.Empty; + rootCmd.Parameters[4].Value = unchecked((long)row.AccumulatedSizeBytes); + rootCmd.ExecuteNonQuery(); + } + rootSw.Stop(); + stats.NativeRootRows += batch.NativeRoots.Length; + stats.NativeRootInsertMs += rootSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeRoots.Length); + break; + + case WriteBatchKind.MemoryRegions: + var regionSw = Stopwatch.StartNew(); + foreach (var row in batch.MemoryRegions) + { + regionCmd.Parameters[0].Value = row.RegionIndex; + regionCmd.Parameters[1].Value = unchecked((long)row.AddressBase); + regionCmd.Parameters[2].Value = unchecked((long)row.AddressSize); + regionCmd.Parameters[3].Value = row.Name ?? string.Empty; + regionCmd.Parameters[4].Value = row.ParentRegionIndex >= 0 ? row.ParentRegionIndex : DBNull.Value; + regionCmd.Parameters[5].Value = row.FirstAllocationIndex >= 0 ? row.FirstAllocationIndex : DBNull.Value; + regionCmd.Parameters[6].Value = row.NumAllocations; + regionCmd.ExecuteNonQuery(); + } + regionSw.Stop(); + stats.MemoryRegionRows += batch.MemoryRegions.Length; + stats.MemoryRegionInsertMs += regionSw.ElapsedMilliseconds; + state.AddWritten(batch.MemoryRegions.Length); + break; + + case WriteBatchKind.NativeAllocations: + var allocationSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeAllocations) + { + allocationCmd.Parameters[0].Value = row.AllocationIndex; + allocationCmd.Parameters[1].Value = unchecked((long)row.Address); + allocationCmd.Parameters[2].Value = unchecked((long)row.SizeBytes); + allocationCmd.Parameters[3].Value = unchecked((long)row.OverheadSizeBytes); + allocationCmd.Parameters[4].Value = unchecked((long)row.PaddingSizeBytes); + allocationCmd.Parameters[5].Value = row.MemoryRegionIndex >= 0 ? row.MemoryRegionIndex : DBNull.Value; + allocationCmd.ExecuteNonQuery(); + } + allocationSw.Stop(); + stats.NativeAllocationRows += batch.NativeAllocations.Length; + stats.NativeAllocationInsertMs += allocationSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeAllocations.Length); + break; + } + } + insertSw.Stop(); + stats.TotalInsertMs = insertSw.ElapsedMilliseconds; + + var commitSw = Stopwatch.StartNew(); + transaction.Commit(); + commitSw.Stop(); + stats.CommitMs = commitSw.ElapsedMilliseconds; + + var indexSw = Stopwatch.StartNew(); + using (var indexTransaction = connection.BeginTransaction()) + { + ExecScript(connection, indexTransaction, CreateIndexesScript); + indexTransaction.Commit(); + } + indexSw.Stop(); + stats.IndexBuildMs = indexSw.ElapsedMilliseconds; + return stats; + } + catch + { + try + { + transaction.Rollback(); + } + catch + { + // Keep original failure. + } + throw; + } + } + + #endregion + + #region Schema + + private static int RowsPerStatement(int columnCount) + { + var byParams = Math.Max(1, MaxSqlParametersPerStatement / Math.Max(1, columnCount)); + return Math.Max(1, Math.Min(DefaultRowsPerBulkInsert, byParams)); + } + + private static SqliteCommand PrepareNativeInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_objects(native_object_index, instance_id, name, size_bytes, type_index, native_type_name, is_destroyed) VALUES ($i, $id, $n, $s, $t, $tn, $d);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$id", SqliteType.Text); + _ = command.Parameters.Add("$n", SqliteType.Text); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$t", SqliteType.Integer); + _ = command.Parameters.Add("$tn", SqliteType.Text); + _ = command.Parameters.Add("$d", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareManagedInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO managed_objects(managed_object_index, address, size_bytes, type_index, managed_type_name, native_object_index) VALUES ($i, $a, $s, $t, $tn, $ni);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Integer); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$t", SqliteType.Integer); + _ = command.Parameters.Add("$tn", SqliteType.Text); + _ = command.Parameters.Add("$ni", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareConnectionInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO connections(from_kind, from_index, to_kind, to_index, connection_type) VALUES ($fk, $fi, $tk, $ti, $ct);"; + _ = command.Parameters.Add("$fk", SqliteType.Text); + _ = command.Parameters.Add("$fi", SqliteType.Integer); + _ = command.Parameters.Add("$tk", SqliteType.Text); + _ = command.Parameters.Add("$ti", SqliteType.Integer); + _ = command.Parameters.Add("$ct", SqliteType.Text); + return command; + } + + private static SqliteCommand PrepareRootInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_roots(root_index, root_id, area_name, object_name, accumulated_size_bytes) VALUES ($i, $rid, $a, $o, $s);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$rid", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Text); + _ = command.Parameters.Add("$o", SqliteType.Text); + _ = command.Parameters.Add("$s", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareRegionInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO memory_regions(region_index, address_base, address_size, name, parent_region_index, first_allocation_index, num_allocations) VALUES ($i, $ab, $as, $n, $p, $f, $c);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$ab", SqliteType.Integer); + _ = command.Parameters.Add("$as", SqliteType.Integer); + _ = command.Parameters.Add("$n", SqliteType.Text); + _ = command.Parameters.Add("$p", SqliteType.Integer); + _ = command.Parameters.Add("$f", SqliteType.Integer); + _ = command.Parameters.Add("$c", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareAllocationInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_allocations(allocation_index, address, size_bytes, overhead_size_bytes, padding_size_bytes, memory_region_index) VALUES ($i, $a, $s, $o, $p, $r);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Integer); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$o", SqliteType.Integer); + _ = command.Parameters.Add("$p", SqliteType.Integer); + _ = command.Parameters.Add("$r", SqliteType.Integer); + return command; + } + + #endregion + + #region Bulk insert + + private static SqliteCommand CreateBulkInsertCommand( + SqliteConnection connection, + SqliteTransaction tx, + string insertPrefix, + int rowCount, + int columnCount) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + var sql = new StringBuilder(insertPrefix.Length + rowCount * (columnCount * 6 + 3)); + sql.Append(insertPrefix); + for (var row = 0; row < rowCount; row++) + { + if (row > 0) + sql.Append(','); + + sql.Append('('); + for (var col = 0; col < columnCount; col++) + { + if (col > 0) + sql.Append(','); + sql.Append("$p").Append(row * columnCount + col); + } + sql.Append(')'); + } + + command.CommandText = sql.ToString(); + return command; + } + + private static void WriteNativeObjectRows(SqliteConnection connection, SqliteTransaction tx, NativeObjectRow[] rows) + { + const int cols = 7; + const string insertPrefix = "INSERT INTO native_objects(native_object_index, instance_id, name, size_bytes, type_index, native_type_name, is_destroyed) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.NativeObjectIndex); + command.Parameters.AddWithValue($"$p{p + 1}", row.InstanceId ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 2}", row.Name ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", unchecked((long)row.SizeBytes)); + command.Parameters.AddWithValue($"$p{p + 4}", row.TypeIndex); + command.Parameters.AddWithValue($"$p{p + 5}", row.NativeTypeName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 6}", row.IsDestroyed ? 1 : 0); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteManagedObjectRows(SqliteConnection connection, SqliteTransaction tx, ManagedObjectRow[] rows) + { + const int cols = 6; + const string insertPrefix = "INSERT INTO managed_objects(managed_object_index, address, size_bytes, type_index, managed_type_name, native_object_index) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.ManagedObjectIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.Address)); + command.Parameters.AddWithValue($"$p{p + 2}", row.SizeBytes); + command.Parameters.AddWithValue($"$p{p + 3}", row.TypeIndex); + command.Parameters.AddWithValue($"$p{p + 4}", row.ManagedTypeName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 5}", row.NativeObjectIndex >= 0 ? row.NativeObjectIndex : DBNull.Value); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteConnectionRows(SqliteConnection connection, SqliteTransaction tx, ConnectionRow[] rows) + { + const int cols = 5; + const string insertPrefix = "INSERT INTO connections(from_kind, from_index, to_kind, to_index, connection_type) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.FromKind ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 1}", row.FromIndex); + command.Parameters.AddWithValue($"$p{p + 2}", row.ToKind ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", row.ToIndex); + command.Parameters.AddWithValue($"$p{p + 4}", row.ConnectionType ?? string.Empty); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteNativeRootRows(SqliteConnection connection, SqliteTransaction tx, NativeRootRow[] rows) + { + const int cols = 5; + const string insertPrefix = "INSERT INTO native_roots(root_index, root_id, area_name, object_name, accumulated_size_bytes) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.RootIndex); + command.Parameters.AddWithValue($"$p{p + 1}", row.RootId); + command.Parameters.AddWithValue($"$p{p + 2}", row.AreaName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", row.ObjectName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 4}", unchecked((long)row.AccumulatedSizeBytes)); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteMemoryRegionRows(SqliteConnection connection, SqliteTransaction tx, MemoryRegionRow[] rows) + { + const int cols = 7; + const string insertPrefix = "INSERT INTO memory_regions(region_index, address_base, address_size, name, parent_region_index, first_allocation_index, num_allocations) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.RegionIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.AddressBase)); + command.Parameters.AddWithValue($"$p{p + 2}", unchecked((long)row.AddressSize)); + command.Parameters.AddWithValue($"$p{p + 3}", row.Name ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 4}", row.ParentRegionIndex >= 0 ? row.ParentRegionIndex : DBNull.Value); + command.Parameters.AddWithValue($"$p{p + 5}", row.FirstAllocationIndex >= 0 ? row.FirstAllocationIndex : DBNull.Value); + command.Parameters.AddWithValue($"$p{p + 6}", row.NumAllocations); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteNativeAllocationRows(SqliteConnection connection, SqliteTransaction tx, NativeAllocationRow[] rows) + { + const int cols = 6; + const string insertPrefix = "INSERT INTO native_allocations(allocation_index, address, size_bytes, overhead_size_bytes, padding_size_bytes, memory_region_index) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.AllocationIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.Address)); + command.Parameters.AddWithValue($"$p{p + 2}", unchecked((long)row.SizeBytes)); + command.Parameters.AddWithValue($"$p{p + 3}", unchecked((long)row.OverheadSizeBytes)); + command.Parameters.AddWithValue($"$p{p + 4}", unchecked((long)row.PaddingSizeBytes)); + command.Parameters.AddWithValue($"$p{p + 5}", row.MemoryRegionIndex >= 0 ? row.MemoryRegionIndex : DBNull.Value); + } + command.ExecuteNonQuery(); + } + } + + #endregion + + #region Helpers + + private static void Exec(SqliteConnection connection, SqliteTransaction? tx, string sql) + { + using var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = sql; + command.ExecuteNonQuery(); + } + + private static void ExecScript(SqliteConnection connection, SqliteTransaction tx, string sql) + { + using var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = sql; + command.ExecuteNonQuery(); + } + + private static long QueryCount(SqliteConnection connection, string sql) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = sql; + var result = cmd.ExecuteScalar(); + return Convert.ToInt64(result); + } + + #endregion + + private const string SchemaTablesScript = """ +DROP TABLE IF EXISTS snapshot_info; +DROP TABLE IF EXISTS native_objects; +DROP TABLE IF EXISTS managed_objects; +DROP TABLE IF EXISTS connections; +DROP TABLE IF EXISTS native_roots; +DROP TABLE IF EXISTS memory_regions; +DROP TABLE IF EXISTS native_allocations; + +CREATE TABLE snapshot_info ( + snapshot_path TEXT NOT NULL, + exported_at_utc TEXT NOT NULL, + unity_version TEXT +); + +CREATE TABLE native_objects ( + native_object_index INTEGER PRIMARY KEY, + instance_id TEXT, + name TEXT, + size_bytes INTEGER NOT NULL, + type_index INTEGER, + native_type_name TEXT, + is_destroyed INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE managed_objects ( + managed_object_index INTEGER PRIMARY KEY, + address INTEGER NOT NULL, + size_bytes INTEGER NOT NULL, + type_index INTEGER, + managed_type_name TEXT, + native_object_index INTEGER +); + +CREATE TABLE connections ( + from_kind TEXT NOT NULL, + from_index INTEGER NOT NULL, + to_kind TEXT NOT NULL, + to_index INTEGER NOT NULL, + connection_type TEXT NOT NULL +); + +CREATE TABLE native_roots ( + root_index INTEGER PRIMARY KEY, + root_id INTEGER NOT NULL, + area_name TEXT, + object_name TEXT, + accumulated_size_bytes INTEGER NOT NULL +); + +CREATE TABLE memory_regions ( + region_index INTEGER PRIMARY KEY, + address_base INTEGER NOT NULL, + address_size INTEGER NOT NULL, + name TEXT, + parent_region_index INTEGER, + first_allocation_index INTEGER, + num_allocations INTEGER NOT NULL +); + +CREATE TABLE native_allocations ( + allocation_index INTEGER PRIMARY KEY, + address INTEGER NOT NULL, + size_bytes INTEGER NOT NULL, + overhead_size_bytes INTEGER NOT NULL, + padding_size_bytes INTEGER NOT NULL, + memory_region_index INTEGER +); +"""; + + private const string CreateIndexesScript = """ +CREATE INDEX idx_connections_from ON connections(from_kind, from_index); +CREATE INDEX idx_connections_to ON connections(to_kind, to_index); +CREATE INDEX idx_native_objects_instance_id ON native_objects(instance_id); +CREATE INDEX idx_native_objects_is_destroyed ON native_objects(is_destroyed); +CREATE INDEX idx_managed_objects_address ON managed_objects(address); +CREATE INDEX idx_memory_regions_address_base ON memory_regions(address_base); +CREATE INDEX idx_native_allocations_address ON native_allocations(address); +CREATE INDEX idx_native_allocations_region ON native_allocations(memory_region_index); +"""; +} + diff --git a/Core/MemorySnapshotDataTools.Core.csproj b/Core/MemorySnapshotDataTools.Core.csproj new file mode 100644 index 0000000..3274da5 --- /dev/null +++ b/Core/MemorySnapshotDataTools.Core.csproj @@ -0,0 +1,18 @@ + + + net10.0 + enable + enable + latest + MemorySnapshotDataTools + MemorySnapshotDataTools.Core + MemorySnapshotDataTools.Tests + MemorySnapshotDataTools + 0.1.0 + + + + + + + diff --git a/Core/Models/ExportPipeline.cs b/Core/Models/ExportPipeline.cs new file mode 100644 index 0000000..a735e97 --- /dev/null +++ b/Core/Models/ExportPipeline.cs @@ -0,0 +1,219 @@ +using System.Threading; + +namespace MemorySnapshotDataTools; + +/// +/// Kind of batch in the producer/consumer pipeline: each batch carries one table's rows. +/// +public enum WriteBatchKind +{ + NativeObjects, + ManagedObjects, + Connections, + NativeRoots, + MemoryRegions, + NativeAllocations, +} + +/// +/// A single batch of rows to write, produced by the export pipeline and consumed by +/// . +/// Only the list matching is populated. +/// +public sealed class WriteBatch +{ + /// Which table this batch belongs to. + public WriteBatchKind Kind { get; init; } + + /// Populated when is . + public NativeObjectRow[] NativeObjects { get; init; } = []; + + /// Populated when is . + public ManagedObjectRow[] ManagedObjects { get; init; } = []; + + /// Populated when is . + public ConnectionRow[] Connections { get; init; } = []; + + /// Populated when is . + public NativeRootRow[] NativeRoots { get; init; } = []; + + /// Populated when is . + public MemoryRegionRow[] MemoryRegions { get; init; } = []; + + /// Populated when is . + public NativeAllocationRow[] NativeAllocations { get; init; } = []; + + /// Creates a batch of native object rows. + public static WriteBatch ForNativeObjects(NativeObjectRow[] rows) => new() { Kind = WriteBatchKind.NativeObjects, NativeObjects = rows }; + + /// Creates a batch of managed object rows. + public static WriteBatch ForManagedObjects(ManagedObjectRow[] rows) => new() { Kind = WriteBatchKind.ManagedObjects, ManagedObjects = rows }; + + /// Creates a batch of connection rows. + public static WriteBatch ForConnections(ConnectionRow[] rows) => new() { Kind = WriteBatchKind.Connections, Connections = rows }; + + /// Creates a batch of native root rows. + public static WriteBatch ForNativeRoots(NativeRootRow[] rows) => new() { Kind = WriteBatchKind.NativeRoots, NativeRoots = rows }; + + /// Creates a batch of memory region rows. + public static WriteBatch ForMemoryRegions(MemoryRegionRow[] rows) => new() { Kind = WriteBatchKind.MemoryRegions, MemoryRegions = rows }; + + /// Creates a batch of native allocation rows. + public static WriteBatch ForNativeAllocations(NativeAllocationRow[] rows) => new() { Kind = WriteBatchKind.NativeAllocations, NativeAllocations = rows }; +} + +/// +/// Shared state for the export pipeline: total rows, materialized count, written count, and queued batch count. +/// Updated by producers (materialized, queued) and the writer (written, queued). Used for progress and sanity checks. +/// +public sealed class PipelineState +{ + /// + /// Creates state for a run with the given total row count (for progress). + /// + public PipelineState(long totalRows) + { + TotalRows = Math.Max(0, totalRows); + } + + /// Total rows to process (sum of all list counts in ). + public long TotalRows { get; } + + /// Rows materialized so far by producers. + public long MaterializedRows => Interlocked.Read(ref _materializedRows); + + /// Rows written so far by the destination writer. + public long WrittenRows => Interlocked.Read(ref _writtenRows); + + /// Number of batches currently in the queue (for backpressure). + public int QueuedBatchCount => Volatile.Read(ref _queuedBatchCount); + + private long _materializedRows; + private long _writtenRows; + private int _queuedBatchCount; + + /// Called by producers when a batch is added to the queue. + public void AddMaterialized(int count) => Interlocked.Add(ref _materializedRows, count); + + /// Called by the writer when a batch is written. + public void AddWritten(int count) => Interlocked.Add(ref _writtenRows, count); + + /// Called when a batch is enqueued. + public void IncrementQueuedBatches() => Interlocked.Increment(ref _queuedBatchCount); + + /// Called when a batch is dequeued by the writer. + public void DecrementQueuedBatches() => Interlocked.Decrement(ref _queuedBatchCount); +} + +/// +/// Summary counts and timings returned from the export pipeline for CLI reporting. +/// Row counts match list counts; timings are in milliseconds. +/// +public sealed class ExportCounts +{ + /// Number of native objects written. + public int NativeObjects; + + /// Number of managed objects written. + public int ManagedObjects; + + /// Number of connections written. + public int Connections; + + /// Number of native roots written. + public int NativeRoots; + + /// Number of memory regions written. + public int MemoryRegions; + + /// Number of native allocations written. + public int NativeAllocations; + + /// Time spent materializing batches (ms). + public long MaterializeMs; + + /// Time spent in the writer (ms). + public long WriteMs; + + /// Total export time (ms); typically set by the CLI after the run. + public long TotalMs; + + /// Backend total insert time (ms). + public long BackendInsertMs; + + /// Backend commit time (ms). + public long BackendCommitMs; + + /// Backend index build time (ms). + public long BackendIndexBuildMs; + + /// Per-table insert times (ms). + public long NativeObjectInsertMs; + + /// Per-table insert times (ms). + public long ManagedObjectInsertMs; + + /// Per-table insert times (ms). + public long ConnectionInsertMs; + + /// Per-table insert times (ms). + public long NativeRootInsertMs; + + /// Per-table insert times (ms). + public long MemoryRegionInsertMs; + + /// Per-table insert times (ms). + public long NativeAllocationInsertMs; +} + +/// +/// Per-run statistics returned by : +/// row counts and timings for inserts, commit, and index build. +/// +public sealed class WriteStats +{ + /// Rows written per table. + public long NativeObjectRows; + + /// Rows written per table. + public long ManagedObjectRows; + + /// Rows written per table. + public long ConnectionRows; + + /// Rows written per table. + public long NativeRootRows; + + /// Rows written per table. + public long MemoryRegionRows; + + /// Rows written per table. + public long NativeAllocationRows; + + /// Insert time per table (ms). + public long NativeObjectInsertMs; + + /// Insert time per table (ms). + public long ManagedObjectInsertMs; + + /// Insert time per table (ms). + public long ConnectionInsertMs; + + /// Insert time per table (ms). + public long NativeRootInsertMs; + + /// Insert time per table (ms). + public long MemoryRegionInsertMs; + + /// Insert time per table (ms). + public long NativeAllocationInsertMs; + + /// Total time spent in inserts (ms). + public long TotalInsertMs; + + /// Commit/sync time (ms). + public long CommitMs; + + /// Index build time (ms). + public long IndexBuildMs; +} diff --git a/Core/Models/IProgressReporter.cs b/Core/Models/IProgressReporter.cs new file mode 100644 index 0000000..c40ce62 --- /dev/null +++ b/Core/Models/IProgressReporter.cs @@ -0,0 +1,16 @@ +namespace MemorySnapshotDataTools; + +/// +/// Abstraction for progress and status reporting during long-running operations. +/// Implemented by the CLI (e.g. ConsoleProgress) and passed into Core APIs +/// so that extraction, export, and report steps can report progress without depending on the host. +/// +public interface IProgressReporter +{ + /// + /// Report a progress or status message. + /// + /// Message to report (e.g. "Extracting...", "Written 10000 rows"). + /// If true, report immediately; otherwise the implementation may throttle (e.g. by time). + void Report(string message, bool force = false); +} diff --git a/Core/Models/Options.cs b/Core/Models/Options.cs new file mode 100644 index 0000000..70ca46e --- /dev/null +++ b/Core/Models/Options.cs @@ -0,0 +1,63 @@ +namespace MemorySnapshotDataTools; + +/// +/// How much validation to run after writing the database (counts only, or full referential checks). +/// +public enum ValidationMode +{ + /// Skip validation. + None, + + /// Verify row counts match extracted data. + Minimal, + + /// Counts plus duplicate-key and orphan/reference checks. + Full, +} + +/// +/// Which database backend to use for export (DuckDB or SQLite). +/// +public enum DestinationKind +{ + /// Export to a DuckDB database (.duckdb). + DuckDb, + + /// Export to a SQLite database (.db). + Sqlite, +} + +/// +/// Options for the export pipeline. Created by the CLI from parsed arguments and passed to +/// . +/// +public sealed class ExportRunOptions +{ + /// Output database file path (.duckdb or .db). + public string OutputDbPath { get; set; } = string.Empty; + + /// Number of rows per batch produced by the pipeline (default 2048). + public int BatchSize { get; set; } = 2048; + + /// Maximum number of batches that can be queued between producers and the writer (default 256). + public int QueueCapacity { get; set; } = 256; + + /// Validation to run after write (default ). + public ValidationMode Validate { get; set; } = ValidationMode.Minimal; +} + +/// +/// Options for report generation. Created by the CLI from parsed arguments and passed to +/// . +/// +public sealed class ReportRunOptions +{ + /// Path to the exported database (DuckDB or SQLite). + public string ReportDbPath { get; set; } = string.Empty; + + /// Output HTML path; if null, a temp file is used and the report is opened in the browser. + public string? ReportOutputPath { get; set; } + + /// Title shown in the generated report (default "Memory Snapshot Report"). + public string ReportTitle { get; set; } = "Memory Snapshot Report"; +} diff --git a/Core/Models/SnapshotData.cs b/Core/Models/SnapshotData.cs new file mode 100644 index 0000000..e7d334e --- /dev/null +++ b/Core/Models/SnapshotData.cs @@ -0,0 +1,49 @@ +namespace MemorySnapshotDataTools; + +/// +/// In-memory container for all data extracted from a Unity memory snapshot (.snap). +/// Produced by and consumed by the export pipeline and validation. +/// +public sealed class RawSnapshotData +{ + /// Metadata about the snapshot (path, export time, Unity version). + public SnapshotInfo SnapshotInfo { get; set; } = new(); + + /// Native Unity objects (e.g. textures, GameObjects). + public List NativeObjects { get; } = []; + + /// Managed heap objects. + public List ManagedObjects { get; } = []; + + /// Edges between objects (from_kind/from_index → to_kind/to_index). + public List Connections { get; } = []; + + /// Native root references (e.g. Scene, DontDestroyOnLoad) with accumulated sizes. + public List NativeRoots { get; } = []; + + /// Native memory regions (hierarchy and address ranges). + public List MemoryRegions { get; } = []; + + /// Allocations within native memory regions. + public List NativeAllocations { get; } = []; + + /// Total number of data rows (all lists combined); used for pipeline progress. + public long TotalRows => NativeObjects.Count + ManagedObjects.Count + Connections.Count + + NativeRoots.Count + MemoryRegions.Count + NativeAllocations.Count; +} + +/// +/// Metadata for a snapshot: path, when it was exported, and Unity version string. +/// Stored in the snapshot_info table and carried in . +/// +public sealed class SnapshotInfo +{ + /// Path to the source .snap file. + public string SnapshotPath { get; set; } = string.Empty; + + /// When the snapshot was exported (UTC), as a string for display/storage. + public string ExportedAtUtc { get; set; } = string.Empty; + + /// Unity version or format string from the snapshot. + public string UnityVersion { get; set; } = string.Empty; +} diff --git a/Core/Models/SnapshotRows.cs b/Core/Models/SnapshotRows.cs new file mode 100644 index 0000000..fc0c577 --- /dev/null +++ b/Core/Models/SnapshotRows.cs @@ -0,0 +1,145 @@ +namespace MemorySnapshotDataTools; + +/// +/// One row from the native_objects table: a native Unity object (texture, GameObject, etc.). +/// +public struct NativeObjectRow +{ + /// Zero-based index in the native objects list. + public int NativeObjectIndex; + + /// Instance ID string (e.g. from Unity). + public string InstanceId; + + /// Display name. + public string Name; + + /// Size in bytes. + public ulong SizeBytes; + + /// Index into the native type names array. + public int TypeIndex; + + /// Resolved native type name (e.g. "Texture2D", "GameObject"). + public string NativeTypeName; + + /// Whether the object is marked destroyed. + public bool IsDestroyed; +} + +/// +/// One row from the managed_objects table: a managed heap object. +/// +public struct ManagedObjectRow +{ + /// Zero-based index in the managed objects list. + public int ManagedObjectIndex; + + /// Address on the managed heap. + public ulong Address; + + /// Size in bytes. + public long SizeBytes; + + /// Index into the managed type descriptions. + public int TypeIndex; + + /// Resolved managed type name. + public string ManagedTypeName; + + /// Linked native object index, or -1 if none. + public long NativeObjectIndex; +} + +/// +/// One row from the connections table: an edge between two objects (e.g. reference, field). +/// +public struct ConnectionRow +{ + /// Source kind: "native_object" or "managed_object". + public string FromKind; + + /// Source object index (native_object_index or managed_object_index). + public long FromIndex; + + /// Target kind: "native_object" or "managed_object". + public string ToKind; + + /// Target object index. + public long ToIndex; + + /// Connection type label (e.g. "GCHandle", "Field"). + public string ConnectionType; +} + +/// +/// One row from the native_roots table: a root reference (e.g. Scene, DontDestroyOnLoad) with accumulated size. +/// +public struct NativeRootRow +{ + /// Zero-based root index. + public int RootIndex; + + /// Root ID from the snapshot. + public long RootId; + + /// Area name (e.g. "Scene", "DontDestroyOnLoad"). + public string AreaName; + + /// Object name for the root. + public string ObjectName; + + /// Accumulated size in bytes for this root. + public ulong AccumulatedSizeBytes; +} + +/// +/// One row from the memory_regions table: a native memory region (address range, hierarchy). +/// +public struct MemoryRegionRow +{ + /// Zero-based region index. + public int RegionIndex; + + /// Base address of the region. + public ulong AddressBase; + + /// Size of the region in bytes. + public ulong AddressSize; + + /// Region name or label. + public string Name; + + /// Parent region index, or -1 if none. + public int ParentRegionIndex; + + /// Index of the first allocation in this region, or -1. + public int FirstAllocationIndex; + + /// Number of allocations in this region. + public int NumAllocations; +} + +/// +/// One row from the native_allocations table: an allocation within a native memory region. +/// +public struct NativeAllocationRow +{ + /// Zero-based allocation index. + public int AllocationIndex; + + /// Allocation address. + public ulong Address; + + /// Size in bytes. + public ulong SizeBytes; + + /// Overhead size in bytes. + public ulong OverheadSizeBytes; + + /// Padding size in bytes. + public ulong PaddingSizeBytes; + + /// Containing memory region index, or -1. + public int MemoryRegionIndex; +} diff --git a/Core/Parser/ManagedSnapshotCrawler.cs b/Core/Parser/ManagedSnapshotCrawler.cs new file mode 100644 index 0000000..b8501e9 --- /dev/null +++ b/Core/Parser/ManagedSnapshotCrawler.cs @@ -0,0 +1,598 @@ +using System.Buffers.Binary; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Result of crawling the managed heap: discovered managed objects, connections between them (and to native objects), and address-to-index map. +/// +internal sealed class ManagedCrawlResult +{ + /// Discovered managed heap objects (index, address, size, type, native link). + public List ManagedObjects { get; } = []; + + /// Edges from the crawl: managed-to-managed, managed-to-native, native-to-managed. + public List ManagedConnections { get; } = []; + + /// Map from managed heap address to . + public Dictionary ManagedIndexByAddress { get; } = []; +} + +/// +/// Crawls the managed heap from a : starts from GC handle roots, follows references, +/// parses object headers and fields/arrays, and produces with objects and connections. +/// +internal sealed class ManagedSnapshotCrawler +{ + private const int TypeFlagValueType = 1 << 0; + private const int TypeFlagArray = 1 << 1; + private const int TypeFlagArrayRankMask = unchecked((int)0xFFFF0000); + + private readonly DecodedSnapshot _snapshot; + private readonly DecodedVirtualMachineInfo _vm; + private readonly List _sections; + private readonly Dictionary _typeInfoToIndex; + private readonly Dictionary _managedAddressToNativeObjectIndex; + private readonly Dictionary _instanceFieldIndexCache = []; + private readonly Queue _crawlQueue = new(); + private readonly ManagedCrawlResult _result = new(); + private readonly HashSet _edgeDedup = []; + + /// Builds the crawler for the given decoded snapshot. Validates VM pointer size and builds heap sections and type/native maps. + /// Decoded snapshot (must include heap sections and type descriptions). + /// If pointer size is not 4 or 8. + public ManagedSnapshotCrawler(DecodedSnapshot snapshot) + { + _snapshot = snapshot; + _vm = snapshot.VirtualMachineInformation; + if (_vm.PointerSize is not 4 and not 8) + throw new InvalidOperationException($"Unsupported VM pointer size: {_vm.PointerSize}"); + + _sections = BuildManagedHeapSections(snapshot); + _typeInfoToIndex = BuildTypeInfoIndex(snapshot); + _managedAddressToNativeObjectIndex = BuildManagedAddressToNativeMap(snapshot); + } + + /// + /// Crawls the managed heap starting from GC handle roots, discovers all reachable managed objects and their references, and returns the result. + /// + /// Decoded snapshot with heap sections and type metadata. + /// Managed objects, connections, and address-to-index map. + public static ManagedCrawlResult Crawl(DecodedSnapshot snapshot) + { + var crawler = new ManagedSnapshotCrawler(snapshot); + return crawler.CrawlInternal(); + } + + private ManagedCrawlResult CrawlInternal() + { + for (var gcHandleIndex = 0; gcHandleIndex < _snapshot.GcHandleTargets.Length; gcHandleIndex++) + { + var address = _snapshot.GcHandleTargets[gcHandleIndex]; + if (address == 0) + continue; + TryEnsureManagedObject(address, $"gc-handle[{gcHandleIndex}]"); + } + + while (_crawlQueue.Count > 0) + { + var address = _crawlQueue.Dequeue(); + var sourceManagedIndex = _result.ManagedIndexByAddress[address]; + var source = _result.ManagedObjects[sourceManagedIndex]; + var sourceTypeIndex = source.TypeIndex; + + foreach (var targetAddress in EnumerateOutgoingManagedReferences(address, sourceTypeIndex)) + { + if (targetAddress == 0) + continue; + if (TryEnsureManagedObject(targetAddress, $"reference from managed index {sourceManagedIndex}") is { } targetManagedIndex) + AddManagedEdge(sourceManagedIndex, targetManagedIndex, "managed_reference"); + } + + if (source.NativeObjectIndex >= 0) + { + AddManagedToNativeEdge(sourceManagedIndex, source.NativeObjectIndex, "managed_native_bridge"); + AddNativeToManagedEdge(source.NativeObjectIndex, sourceManagedIndex, "native_gc_handle_bridge"); + } + } + + return _result; + } + + /// Returns managed object index if the object was added or already present; null if type could not be resolved (object skipped). + private int? TryEnsureManagedObject(ulong address, string reason) + { + if (_result.ManagedIndexByAddress.TryGetValue(address, out var existing)) + return existing; + + var parsed = ParseManagedObjectHeader(address, reason); + if (parsed is null) + return null; + + var managedIndex = _result.ManagedObjects.Count; + _result.ManagedIndexByAddress[address] = managedIndex; + _result.ManagedObjects.Add(new ManagedObjectRow + { + ManagedObjectIndex = managedIndex, + Address = address, + SizeBytes = parsed.Value.SizeBytes, + TypeIndex = parsed.Value.TypeIndex, + ManagedTypeName = _snapshot.ManagedTypeNames[parsed.Value.TypeIndex] ?? string.Empty, + NativeObjectIndex = _managedAddressToNativeObjectIndex.TryGetValue(address, out var nativeObjectIndex) ? nativeObjectIndex : -1, + }); + _crawlQueue.Enqueue(address); + return managedIndex; + } + + private ParsedManagedObject? ParseManagedObjectHeader(ulong address, string reason) + { + if (!TryReadPointer(address, out var ptrIdentity)) + return null; + if (!TryResolveTypeIndex(ptrIdentity, reason, out var typeIndex)) + return null; + var sizeBytes = ComputeObjectSizeBytes(address, typeIndex, reason); + if (sizeBytes <= 0) + return null; + if (!TryGetReadableWindow(address, checked((ulong)sizeBytes), out _, out _)) + return null; + return new ParsedManagedObject(typeIndex, sizeBytes); + } + + private bool TryResolveTypeIndex(ulong ptrIdentity, string reason, out int typeIndex) + { + typeIndex = 0; + if (_typeInfoToIndex.TryGetValue(ptrIdentity, out var direct)) + { + typeIndex = direct; + return true; + } + + if (!TryReadPointer(ptrIdentity, out var typeInfoPtr)) + return false; + + if (_typeInfoToIndex.TryGetValue(typeInfoPtr, out var indirect)) + { + typeIndex = indirect; + return true; + } + return false; + } + + private long ComputeObjectSizeBytes(ulong address, int typeIndex, string reason) + { + EnsureValidTypeIndex(typeIndex, reason); + if (IsArrayType(typeIndex)) + { + var length = ReadArrayLength(address, typeIndex, reason); + var elementTypeIndex = _snapshot.ManagedTypeBaseOrElementTypeIndices[typeIndex]; + if (elementTypeIndex < 0) + elementTypeIndex = typeIndex; + EnsureValidTypeIndex(elementTypeIndex, reason); + + var elementSize = IsValueType(elementTypeIndex) + ? _snapshot.ManagedTypeSizes[elementTypeIndex] + : checked((int)_vm.PointerSize); + if (elementSize < 0) + throw new InvalidOperationException($"Negative array element size for type '{GetTypeName(elementTypeIndex)}'. reason={reason}"); + + return checked((long)_vm.ArrayHeaderSize + checked((long)elementSize * length)); + } + + if (IsStringType(typeIndex)) + { + var length = ReadInt32Strict(address + _vm.ObjectHeaderSize, $"string length for {reason}"); + if (length < 0) + throw new InvalidOperationException($"Negative string length {length} at 0x{address:X16}. reason={reason}"); + return checked((long)_vm.ObjectHeaderSize + 4L + checked((long)length * 2L) + 2L); + } + + var typeSize = _snapshot.ManagedTypeSizes[typeIndex]; + if (typeSize < 0) + throw new InvalidOperationException($"Negative type size {typeSize} for '{GetTypeName(typeIndex)}'. reason={reason}"); + return IsValueType(typeIndex) + ? checked(typeSize + (long)_vm.ObjectHeaderSize) + : typeSize; + } + + private long ReadArrayLength(ulong address, int arrayTypeIndex, string reason) + { + var bounds = ReadPointerStrict(address + _vm.ArrayBoundsOffsetInHeader, $"array bounds for {reason}"); + if (bounds == 0) + return ReadInt32Strict(address + _vm.ArraySizeOffsetInHeader, $"array size for {reason}"); + + var rank = (_snapshot.ManagedTypeFlags[arrayTypeIndex] & TypeFlagArrayRankMask) >> 16; + if (rank <= 0) + throw new InvalidOperationException($"Invalid array rank {rank} for '{GetTypeName(arrayTypeIndex)}'. reason={reason}"); + + long length = 1; + for (var i = 0; i < rank; i++) + { + var dimensionLength = ReadInt32Strict(bounds + (ulong)(i * 8), $"array rank[{i}] length for {reason}"); + if (dimensionLength < 0) + throw new InvalidOperationException($"Negative array dimension length {dimensionLength} for '{GetTypeName(arrayTypeIndex)}'. reason={reason}"); + length = checked(length * dimensionLength); + } + + return length; + } + + private IEnumerable EnumerateOutgoingManagedReferences(ulong objectAddress, int objectTypeIndex) + { + if (IsStringType(objectTypeIndex)) + yield break; + + if (IsArrayType(objectTypeIndex)) + { + foreach (var reference in EnumerateArrayReferences(objectAddress, objectTypeIndex)) + yield return reference; + yield break; + } + + foreach (var reference in EnumerateReferenceTypeFieldReferences(objectAddress, objectTypeIndex)) + yield return reference; + } + + private IEnumerable EnumerateArrayReferences(ulong arrayAddress, int arrayTypeIndex) + { + var length = ReadArrayLength(arrayAddress, arrayTypeIndex, $"array refs for '{GetTypeName(arrayTypeIndex)}'"); + if (length == 0) + yield break; + + var elementTypeIndex = _snapshot.ManagedTypeBaseOrElementTypeIndices[arrayTypeIndex]; + if (elementTypeIndex < 0) + elementTypeIndex = arrayTypeIndex; + EnsureValidTypeIndex(elementTypeIndex, $"array element of {GetTypeName(arrayTypeIndex)}"); + + var arrayDataAddress = checked(arrayAddress + _vm.ArrayHeaderSize); + if (IsValueType(elementTypeIndex)) + { + var elementSize = _snapshot.ManagedTypeSizes[elementTypeIndex]; + if (elementSize < 0) + throw new InvalidOperationException($"Negative value-type array element size for '{GetTypeName(elementTypeIndex)}'."); + + for (long i = 0; i < length; i++) + { + var elementAddress = checked(arrayDataAddress + checked((ulong)(i * elementSize))); + foreach (var reference in EnumerateValueTypeReferences(elementAddress, elementTypeIndex, recursionDepth: 0)) + yield return reference; + } + } + else + { + for (long i = 0; i < length; i++) + { + var ptrAddress = checked(arrayDataAddress + checked((ulong)(i * (long)_vm.PointerSize))); + var targetAddress = ReadPointerStrict(ptrAddress, $"array element pointer for '{GetTypeName(arrayTypeIndex)}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private IEnumerable EnumerateReferenceTypeFieldReferences(ulong objectAddress, int typeIndex) + { + var instanceFields = GetInstanceFieldIndices(typeIndex); + for (var instanceFieldIdx = 0; instanceFieldIdx < instanceFields.Length; instanceFieldIdx++) + { + var fieldIndex = instanceFields[instanceFieldIdx]; + if ((uint)fieldIndex >= (uint)_snapshot.FieldTypeIndices.Length) + throw new InvalidOperationException($"Field index out of range: {fieldIndex} for type '{GetTypeName(typeIndex)}'."); + if (_snapshot.FieldIsStatic[fieldIndex] != 0) + continue; + + var fieldOffset = _snapshot.FieldOffsets[fieldIndex]; + if (fieldOffset < 0) + continue; + + var fieldTypeIndex = _snapshot.FieldTypeIndices[fieldIndex]; + EnsureValidTypeIndex(fieldTypeIndex, $"field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(typeIndex)}'"); + + var fieldAddress = checked(objectAddress + (ulong)fieldOffset); + if (IsValueType(fieldTypeIndex)) + { + foreach (var reference in EnumerateValueTypeReferences(fieldAddress, fieldTypeIndex, recursionDepth: 0)) + yield return reference; + } + else + { + var targetAddress = ReadPointerStrict(fieldAddress, $"field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(typeIndex)}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private IEnumerable EnumerateValueTypeReferences(ulong valueBaseAddress, int valueTypeIndex, int recursionDepth) + { + if (recursionDepth > 24) + throw new InvalidOperationException($"Value-type recursion depth exceeded for '{GetTypeName(valueTypeIndex)}'."); + + var instanceFields = GetInstanceFieldIndices(valueTypeIndex); + for (var instanceFieldIdx = 0; instanceFieldIdx < instanceFields.Length; instanceFieldIdx++) + { + var fieldIndex = instanceFields[instanceFieldIdx]; + if ((uint)fieldIndex >= (uint)_snapshot.FieldTypeIndices.Length) + throw new InvalidOperationException($"Value-type field index out of range: {fieldIndex} for '{GetTypeName(valueTypeIndex)}'."); + if (_snapshot.FieldIsStatic[fieldIndex] != 0) + continue; + + var adjustedOffset = _snapshot.FieldOffsets[fieldIndex] - (int)_vm.ObjectHeaderSize; + if (adjustedOffset < 0) + continue; + + var fieldTypeIndex = _snapshot.FieldTypeIndices[fieldIndex]; + EnsureValidTypeIndex(fieldTypeIndex, $"value-type field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(valueTypeIndex)}'"); + + var fieldAddress = checked(valueBaseAddress + (ulong)adjustedOffset); + if (IsValueType(fieldTypeIndex)) + { + if (fieldTypeIndex == valueTypeIndex) + continue; + foreach (var nested in EnumerateValueTypeReferences(fieldAddress, fieldTypeIndex, recursionDepth + 1)) + yield return nested; + } + else + { + var targetAddress = ReadPointerStrict(fieldAddress, $"value-type field '{_snapshot.FieldNames[fieldIndex]}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private int[] GetInstanceFieldIndices(int typeIndex) + { + EnsureValidTypeIndex(typeIndex, "enumerate fields"); + if (_instanceFieldIndexCache.TryGetValue(typeIndex, out var cached)) + return cached; + + var chain = new List(8); + var visited = new HashSet(); + var current = typeIndex; + while (current >= 0) + { + if (!visited.Add(current)) + throw new InvalidOperationException($"Cyclic managed base-type chain detected at type index {current}."); + chain.Add(current); + current = _snapshot.ManagedTypeBaseOrElementTypeIndices[current]; + } + + var fields = new List(16); + for (var i = chain.Count - 1; i >= 0; i--) + { + var chainTypeIndex = chain[i]; + var fieldIndices = _snapshot.ManagedTypeFieldIndices[chainTypeIndex]; + for (var fieldIndex = 0; fieldIndex < fieldIndices.Length; fieldIndex++) + fields.Add(fieldIndices[fieldIndex]); + } + + cached = fields.ToArray(); + _instanceFieldIndexCache[typeIndex] = cached; + return cached; + } + + private void AddManagedEdge(long fromManagedIndex, long toManagedIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromManagedIndex, toManagedIndex, EdgeType.ManagedToManaged))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "managed_object", + FromIndex = fromManagedIndex, + ToKind = "managed_object", + ToIndex = toManagedIndex, + ConnectionType = type, + }); + } + } + + private void AddManagedToNativeEdge(long fromManagedIndex, long toNativeIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromManagedIndex, toNativeIndex, EdgeType.ManagedToNative))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "managed_object", + FromIndex = fromManagedIndex, + ToKind = "native_object", + ToIndex = toNativeIndex, + ConnectionType = type, + }); + } + } + + private void AddNativeToManagedEdge(long fromNativeIndex, long toManagedIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromNativeIndex, toManagedIndex, EdgeType.NativeToManaged))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "native_object", + FromIndex = fromNativeIndex, + ToKind = "managed_object", + ToIndex = toManagedIndex, + ConnectionType = type, + }); + } + } + + private bool IsArrayType(int typeIndex) => (_snapshot.ManagedTypeFlags[typeIndex] & TypeFlagArray) != 0; + + private bool IsValueType(int typeIndex) => (_snapshot.ManagedTypeFlags[typeIndex] & TypeFlagValueType) != 0; + + private bool IsStringType(int typeIndex) + => string.Equals(_snapshot.ManagedTypeNames[typeIndex], "System.String", StringComparison.Ordinal); + + private string GetTypeName(int typeIndex) + => typeIndex >= 0 && typeIndex < _snapshot.ManagedTypeNames.Length + ? _snapshot.ManagedTypeNames[typeIndex] ?? string.Empty + : $"type#{typeIndex}"; + + private void EnsureValidTypeIndex(int typeIndex, string reason) + { + if (typeIndex < 0 || typeIndex >= _snapshot.ManagedTypeNames.Length) + throw new InvalidOperationException($"Invalid managed type index {typeIndex}. reason={reason}"); + } + + private ulong ReadPointerStrict(ulong address, string reason) + { + if (!TryReadPointer(address, out var value)) + throw new InvalidOperationException($"Unable to read pointer at 0x{address:X16}. reason={reason}"); + return value; + } + + private int ReadInt32Strict(ulong address, string reason) + { + if (!TryReadInt32(address, out var value)) + throw new InvalidOperationException($"Unable to read int32 at 0x{address:X16}. reason={reason}"); + return value; + } + + private void EnsureReadable(ulong address, long byteCount, string reason) + { + if (byteCount < 0) + throw new InvalidOperationException($"Negative readability check size {byteCount}. reason={reason}"); + if (!TryGetReadableWindow(address, checked((ulong)byteCount), out _, out _)) + throw new InvalidOperationException($"Managed heap read out of range at 0x{address:X16} len={byteCount}. reason={reason}"); + } + + private bool TryReadPointer(ulong address, out ulong value) + { + value = 0; + if (!TryGetReadableWindow(address, _vm.PointerSize, out var section, out var offset)) + return false; + + if (_vm.PointerSize == 8) + { + value = BinaryPrimitives.ReadUInt64LittleEndian(section.Bytes.AsSpan(offset, 8)); + return true; + } + + value = BinaryPrimitives.ReadUInt32LittleEndian(section.Bytes.AsSpan(offset, 4)); + return true; + } + + private bool TryReadInt32(ulong address, out int value) + { + value = 0; + if (!TryGetReadableWindow(address, 4, out var section, out var offset)) + return false; + value = BinaryPrimitives.ReadInt32LittleEndian(section.Bytes.AsSpan(offset, 4)); + return true; + } + + private bool TryGetReadableWindow(ulong address, ulong byteCount, out ManagedHeapSection section, out int offsetInSection) + { + section = default; + offsetInSection = 0; + if (_sections.Count == 0) + return false; + + var sectionIndex = FindSectionIndex(address); + if (sectionIndex < 0) + return false; + + var candidate = _sections[sectionIndex]; + var localOffset = checked((long)(address - candidate.StartAddress)); + if (localOffset < 0 || localOffset > candidate.Bytes.Length) + return false; + + if (byteCount > 0 && checked((ulong)localOffset + byteCount) > (ulong)candidate.Bytes.Length) + return false; + + section = candidate; + offsetInSection = (int)localOffset; + return true; + } + + private int FindSectionIndex(ulong address) + { + var lo = 0; + var hi = _sections.Count - 1; + var found = -1; + while (lo <= hi) + { + var mid = lo + ((hi - lo) / 2); + var start = _sections[mid].StartAddress; + if (start <= address) + { + found = mid; + lo = mid + 1; + } + else + { + hi = mid - 1; + } + } + + if (found < 0) + return -1; + + var section = _sections[found]; + return address < section.EndAddressExclusive ? found : -1; + } + + private static Dictionary BuildTypeInfoIndex(DecodedSnapshot snapshot) + { + var map = new Dictionary(snapshot.ManagedTypeInfoAddresses.Length); + for (var i = 0; i < snapshot.ManagedTypeInfoAddresses.Length; i++) + { + var typeInfoAddress = snapshot.ManagedTypeInfoAddresses[i]; + if (typeInfoAddress == 0) + continue; + map.TryAdd(typeInfoAddress, i); + } + return map; + } + + private static Dictionary BuildManagedAddressToNativeMap(DecodedSnapshot snapshot) + { + var gcHandleToNativeObject = new Dictionary(snapshot.NativeObjectGcHandleIndices.Length); + for (var nativeIndex = 0; nativeIndex < snapshot.NativeObjectGcHandleIndices.Length; nativeIndex++) + { + var gcHandleIndex = snapshot.NativeObjectGcHandleIndices[nativeIndex]; + if (gcHandleIndex >= 0) + gcHandleToNativeObject.TryAdd(gcHandleIndex, nativeIndex); + } + + var map = new Dictionary(gcHandleToNativeObject.Count); + foreach (var (gcHandleIndex, nativeObjectIndex) in gcHandleToNativeObject) + { + if (gcHandleIndex < 0 || gcHandleIndex >= snapshot.GcHandleTargets.Length) + continue; + var address = snapshot.GcHandleTargets[gcHandleIndex]; + if (address != 0) + map[address] = nativeObjectIndex; + } + + return map; + } + + private static List BuildManagedHeapSections(DecodedSnapshot snapshot) + { + var sections = new List(snapshot.ManagedHeapSectionStartAddresses.Length); + for (var i = 0; i < snapshot.ManagedHeapSectionStartAddresses.Length; i++) + { + sections.Add(new ManagedHeapSection(snapshot.ManagedHeapSectionStartAddresses[i], snapshot.ManagedHeapSectionBytes[i])); + } + + sections.Sort((a, b) => a.StartAddress.CompareTo(b.StartAddress)); + return sections; + } + + private readonly record struct ParsedManagedObject(int TypeIndex, long SizeBytes); + + private readonly record struct ManagedHeapSection(ulong StartAddress, byte[] Bytes) + { + public ulong EndAddressExclusive => StartAddress + (ulong)Bytes.Length; + } + + private enum EdgeType : byte + { + ManagedToManaged = 0, + ManagedToNative = 1, + NativeToManaged = 2, + } + + private readonly record struct EdgeKey(long FromIndex, long ToIndex, EdgeType Type); +} diff --git a/Core/Parser/SnapDataModel.cs b/Core/Parser/SnapDataModel.cs new file mode 100644 index 0000000..3984c62 --- /dev/null +++ b/Core/Parser/SnapDataModel.cs @@ -0,0 +1,239 @@ +namespace MemorySnapshotDataTools.Parser; + +/// Format of a snapshot entry: how element count and data are stored. +internal enum SnapEntryFormat : ushort +{ + /// Entry not present. + Undefined = 0, + + /// Single value; size stored in metadata. + SingleElement = 1, + + /// Fixed-size elements; count and element size in metadata. + ConstantSizeElementArray = 2, + + /// Variable-length elements; offsets array defines boundaries. + DynamicSizeElementArray = 3, +} + +/// Identifiers for snapshot file sections (metadata, native types, connections, heap, etc.). +internal enum SnapEntryType : ushort +{ + Metadata_Version = 0, + Metadata_RecordDate = 1, + Metadata_VirtualMachineInformation = 4, + NativeTypes_Name = 5, + NativeTypes_NativeBaseTypeArrayIndex = 6, + NativeObjects_NativeTypeArrayIndex = 7, + NativeObjects_HideFlags = 8, + NativeObjects_Flags = 9, + NativeObjects_InstanceId = 10, + NativeObjects_Name = 11, + NativeObjects_Size = 13, + GCHandles_Target = 15, + Connections_From = 16, + Connections_To = 17, + ManagedHeapSections_StartAddress = 18, + ManagedHeapSections_Bytes = 19, + TypeDescriptions_Flags = 22, + TypeDescriptions_Name = 23, + TypeDescriptions_Assembly = 24, + TypeDescriptions_FieldIndices = 25, + TypeDescriptions_BaseOrElementTypeIndex = 27, + TypeDescriptions_Size = 28, + TypeDescriptions_TypeInfoAddress = 29, + FieldDescriptions_Offset = 31, + FieldDescriptions_TypeIndex = 32, + FieldDescriptions_Name = 33, + FieldDescriptions_IsStatic = 34, + NativeRootReferences_Id = 35, + NativeRootReferences_AreaName = 36, + NativeRootReferences_ObjectName = 37, + NativeRootReferences_AccumulatedSize = 38, + NativeAllocations_MemoryRegionIndex = 39, + NativeAllocations_Address = 42, + NativeAllocations_Size = 43, + NativeAllocations_OverheadSize = 44, + NativeAllocations_PaddingSize = 45, + NativeMemoryRegions_Name = 46, + NativeMemoryRegions_ParentIndex = 47, + NativeMemoryRegions_AddressBase = 48, + NativeMemoryRegions_AddressSize = 49, + NativeMemoryRegions_FirstAllocationIndex = 50, + NativeMemoryRegions_NumAllocations = 51, + NativeMemoryLabels_Name = 52, + NativeObjects_GCHandleIndex = 58, + NativeObjects_GCHandleIndex_Legacy = 62, +} + +/// Format version constants used when decoding snapshot entries (e.g. instance IDs, heap sections). +internal static class SnapFormatVersion +{ + /// Version at which native connections use instance IDs. + public const uint NativeConnectionsAsInstanceIdsVersion = 10; + + /// Version at which entity IDs are 8-byte structs. + public const uint EntityIDAs8ByteStructs = 18; + + /// Version for memory label size and heap ID in heap section metadata. + public const uint MemLabelSizeAndHeapIdVersion = 12; +} + +/// +/// Decoded virtual machine layout from snapshot metadata (pointer size, header layout, allocation granularity). +/// Used by to interpret managed heap layout. +/// +public sealed class DecodedVirtualMachineInfo +{ + /// Size of a pointer in bytes (4 or 8). + public uint PointerSize { get; set; } + + /// Object header size in bytes. + public uint ObjectHeaderSize { get; set; } + + /// Array object header size in bytes. + public uint ArrayHeaderSize { get; set; } + + /// Offset of array bounds in the array header. + public uint ArrayBoundsOffsetInHeader { get; set; } + + /// Offset of array length/size in the array header. + public uint ArraySizeOffsetInHeader { get; set; } + + /// Allocation granularity in bytes. + public uint AllocationGranularity { get; set; } +} + +/// +/// Fully decoded in-memory snapshot: all native and managed metadata and raw arrays as read from the .snap file. +/// Produced by and consumed by and . +/// +public sealed class DecodedSnapshot +{ + /// Snapshot format version from metadata. + public uint FormatVersion { get; set; } + + /// Record date in .NET ticks (UTC). + public long RecordDateTicksUtc { get; set; } + + /// Native type display names. + public string[] NativeTypeNames { get; set; } = []; + + /// Per-native-object index into . + public int[] NativeObjectTypeIndices { get; set; } = []; + + /// Per-native-object instance ID. + public ulong[] NativeObjectInstanceIds { get; set; } = []; + + /// Per-native-object name. + public string[] NativeObjectNames { get; set; } = []; + + /// Per-native-object size in bytes. + public ulong[] NativeObjectSizes { get; set; } = []; + + /// Per-native-object flags (e.g. destroyed). + public int[] NativeObjectFlags { get; set; } = []; + + /// Per-native-object GC handle index, or -1. + public int[] NativeObjectGcHandleIndices { get; set; } = []; + + /// GC handle target addresses (managed heap). + public ulong[] GcHandleTargets { get; set; } = []; + + /// Connection source unified indices. + public int[] ConnectionsFrom { get; set; } = []; + + /// Connection target unified indices. + public int[] ConnectionsTo { get; set; } = []; + + /// Native root reference IDs. + public long[] NativeRootIds { get; set; } = []; + + /// Native root area names (e.g. Scene, DontDestroyOnLoad). + public string[] NativeRootAreaNames { get; set; } = []; + + /// Native root object names. + public string[] NativeRootObjectNames { get; set; } = []; + + /// Native root accumulated sizes in bytes. + public ulong[] NativeRootAccumulatedSizes { get; set; } = []; + + /// Native memory region names. + public string[] NativeMemoryRegionNames { get; set; } = []; + + /// Parent region index per region, or -1. + public int[] NativeMemoryRegionParentIndices { get; set; } = []; + + /// Base address per region. + public ulong[] NativeMemoryRegionAddressBases { get; set; } = []; + + /// Size in bytes per region. + public ulong[] NativeMemoryRegionAddressSizes { get; set; } = []; + + /// First allocation index per region, or -1. + public int[] NativeMemoryRegionFirstAllocationIndices { get; set; } = []; + + /// Number of allocations per region. + public int[] NativeMemoryRegionNumAllocations { get; set; } = []; + + /// Native memory label names. + public string[] NativeMemoryLabelNames { get; set; } = []; + + /// Native allocation addresses. + public ulong[] NativeAllocationAddresses { get; set; } = []; + + /// Native allocation sizes in bytes. + public ulong[] NativeAllocationSizes { get; set; } = []; + + /// Native allocation overhead sizes in bytes. + public ulong[] NativeAllocationOverheadSizes { get; set; } = []; + + /// Native allocation padding sizes in bytes. + public ulong[] NativeAllocationPaddingSizes { get; set; } = []; + + /// Memory region index per allocation, or -1. + public int[] NativeAllocationMemoryRegionIndices { get; set; } = []; + + /// VM layout (pointer size, header offsets). + public DecodedVirtualMachineInfo VirtualMachineInformation { get; set; } = new(); + + /// Start address of each managed heap section. + public ulong[] ManagedHeapSectionStartAddresses { get; set; } = []; + + /// Raw bytes of each managed heap section. + public byte[][] ManagedHeapSectionBytes { get; set; } = []; + + /// Managed type flags (value type, array, etc.). + public int[] ManagedTypeFlags { get; set; } = []; + + /// Managed type names. + public string[] ManagedTypeNames { get; set; } = []; + + /// Managed type assembly names. + public string[] ManagedTypeAssemblies { get; set; } = []; + + /// Base or element type index per managed type. + public int[] ManagedTypeBaseOrElementTypeIndices { get; set; } = []; + + /// Managed type size in bytes. + public int[] ManagedTypeSizes { get; set; } = []; + + /// Type info address per managed type (for type resolution on heap). + public ulong[] ManagedTypeInfoAddresses { get; set; } = []; + + /// Per-type array of field description indices. + public int[][] ManagedTypeFieldIndices { get; set; } = []; + + /// Field offset in bytes. + public int[] FieldOffsets { get; set; } = []; + + /// Field type index. + public int[] FieldTypeIndices { get; set; } = []; + + /// Field name. + public string[] FieldNames { get; set; } = []; + + /// Non-zero if field is static. + public byte[] FieldIsStatic { get; set; } = []; +} + diff --git a/Core/Parser/SnapReader.cs b/Core/Parser/SnapReader.cs new file mode 100644 index 0000000..3bf006e --- /dev/null +++ b/Core/Parser/SnapReader.cs @@ -0,0 +1,529 @@ +using System.Buffers.Binary; +using System.Buffers; +using System.Runtime.InteropServices; +using System.Text; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Low-level reader for Unity memory snapshot (.snap) files. Parses the file header, chapter directory, +/// block and entry metadata, and provides typed access to snapshot entries (primitive arrays, strings, dynamic arrays). +/// Call to create an instance; use and before reading. +/// +internal sealed class SnapReader : IDisposable +{ + private const uint HeaderSignature = 0xAEABCDCD; + private const uint DirectorySignature = 0xCDCDAEAB; + private const uint FooterSignature = 0xABCDCDAE; + private const uint ChapterSectionVersion = 0x20170724; + private const uint BlockSectionVersion = 0x20170724; + + private readonly FileStream _stream; + private readonly BinaryReader _reader; + private readonly EntryData[] _entries; + private readonly BlockData[] _blocks; + + private SnapReader(FileStream stream, BinaryReader reader, EntryData[] entries, BlockData[] blocks) + { + _stream = stream; + _reader = reader; + _entries = entries; + _blocks = blocks; + } + + /// + /// Opens a snapshot file and initializes the reader. Validates header/footer signatures and chapter directory, then loads block and entry metadata. + /// + /// Path to the .snap file. + /// A configured ready for entry reads. + /// If file format is invalid or unsupported. + public static SnapReader Open(string snapshotPath) + { + var stream = new FileStream(snapshotPath, FileMode.Open, FileAccess.Read, FileShare.Read); + var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true); + try + { + ValidateSignatures(reader, stream.Length, out var chapterDirectoryOffset); + + stream.Position = chapterDirectoryOffset; + var directorySig = reader.ReadUInt32(); + var chapterVersion = reader.ReadUInt32(); + if (directorySig != DirectorySignature) + throw new InvalidOperationException($"Invalid snapshot chapter directory signature: 0x{directorySig:X8}"); + if (chapterVersion != ChapterSectionVersion) + throw new InvalidOperationException($"Unsupported chapter section version: 0x{chapterVersion:X8}"); + + var blockSectionOffset = reader.ReadInt64(); + var entryDirectoryOffset = chapterDirectoryOffset + sizeof(uint) + sizeof(uint) + sizeof(long); + var entryOffsets = ReadEntryOffsets(reader, entryDirectoryOffset); + var blockOffsets = ReadBlockOffsets(reader, blockSectionOffset); + var blocks = ReadBlocks(reader, blockOffsets); + var entries = ReadEntries(reader, entryOffsets); + + return new SnapReader(stream, reader, entries, blocks); + } + catch + { + reader.Dispose(); + stream.Dispose(); + throw; + } + } + + /// Returns whether the snapshot contains data for the given entry type. + /// The snapshot entry type to check. + /// True if the entry is present and defined. + public bool HasEntry(SnapEntryType entryType) => (int)entryType < _entries.Length && _entries[(int)entryType].IsDefined; + + /// Returns the number of elements in the given entry (1 for single-element, array length otherwise). + /// The snapshot entry type. + /// Element count for the entry. + /// If the entry is missing or index out of range. + public uint GetEntryCount(SnapEntryType entryType) + { + EnsureDefined(entryType); + return _entries[(int)entryType].Count; + } + + /// Reads the snapshot format version number from metadata. + /// Format version (e.g. 10, 18). + public uint ReadMetadataVersion() => ReadSingle(SnapEntryType.Metadata_Version); + + /// Reads the snapshot record date as .NET ticks (UTC), or 0 if the entry is missing. + /// Ticks value or 0. + public long ReadMetadataRecordDateTicks() + { + if (!HasEntry(SnapEntryType.Metadata_RecordDate)) + return 0; + return ReadSingle(SnapEntryType.Metadata_RecordDate); + } + + /// + /// Reads an entry as an array of unmanaged primitives. Supports single-element, constant-size, and dynamic-size entry formats. + /// + /// Unmanaged type (e.g. int, long, ulong). + /// The entry to read. + /// Array of values; may be empty if the entry has no data. + /// If entry is missing, format is unsupported, or size mismatch. + public T[] ReadPrimitiveArray(SnapEntryType entryType) where T : unmanaged + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + var elementSize = Marshal.SizeOf(); + if (entry.Format == SnapEntryFormat.SingleElement) + { + var bytes = ReadConstEntryBytes(entry, 0, 1); + if (bytes.Length == 0) + return []; + if (bytes.Length % elementSize != 0) + { + throw new InvalidOperationException( + $"Entry '{entryType}' byte-size {bytes.Length} is not divisible by element size {elementSize}."); + } + + var singleCount = bytes.Length / elementSize; + var output = new T[singleCount]; + bytes.AsSpan().CopyTo(MemoryMarshal.AsBytes(output.AsSpan())); + return output; + } + + var count = checked((int)entry.Count); + if (count == 0) + return []; + + if (entry.Format == SnapEntryFormat.ConstantSizeElementArray) + { + var expectedBytes = checked(count * elementSize); + var bytes = ReadConstEntryBytes(entry, 0, count); + if (bytes.Length != expectedBytes) + throw new InvalidOperationException($"Entry '{entryType}' byte-size mismatch. expected={expectedBytes}, actual={bytes.Length}"); + + var output = new T[count]; + var source = bytes.AsSpan(); + var destination = MemoryMarshal.AsBytes(output.AsSpan()); + source.CopyTo(destination); + return output; + } + + if (entry.Format == SnapEntryFormat.DynamicSizeElementArray) + { + var output = new T[count]; + Span smallBuffer = stackalloc byte[256]; + for (var i = 0; i < count; i++) + { + GetDynamicElementBounds(entry, i, out var start, out var length); + if (length != elementSize) + throw new InvalidOperationException( + $"Dynamic entry '{entryType}' element {i} has unexpected size {length}, expected {elementSize}."); + + Span bytes = elementSize <= 256 ? smallBuffer[..elementSize] : new byte[elementSize]; + ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, bytes[..elementSize]); + output[i] = MemoryMarshal.Read(bytes); + } + return output; + } + + throw new InvalidOperationException($"Entry '{entryType}' has unsupported format '{entry.Format}'."); + } + + /// + /// Reads an entry as an array of UTF-8 strings. The entry must be in dynamic-size element array format. + /// + /// The entry to read. + /// Array of decoded strings. + /// If entry is missing or not a dynamic string array. + public string[] ReadUtf8StringArray(SnapEntryType entryType) + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + if (entry.Format != SnapEntryFormat.DynamicSizeElementArray) + throw new InvalidOperationException($"Entry '{entryType}' is not a dynamic string array."); + + var count = checked((int)entry.Count); + var output = new string[count]; + for (var i = 0; i < count; i++) + { + GetDynamicElementBounds(entry, i, out var start, out var length); + if (length == 0) + { + output[i] = string.Empty; + continue; + } + + var rented = ArrayPool.Shared.Rent(length); + try + { + ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, rented.AsSpan(0, length)); + output[i] = Encoding.UTF8.GetString(rented, 0, length); + } + finally + { + ArrayPool.Shared.Return(rented); + } + } + + return output; + } + + /// + /// Reads an entry as an array of variable-length byte arrays (dynamic-size element array format). + /// + /// The entry to read. + /// Array of byte arrays, one per element. + /// If entry is missing or not dynamic. + public byte[][] ReadDynamicByteArrays(SnapEntryType entryType) + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + if (entry.Format != SnapEntryFormat.DynamicSizeElementArray) + throw new InvalidOperationException($"Entry '{entryType}' is not a dynamic array."); + + var count = checked((int)entry.Count); + var output = new byte[count][]; + for (var i = 0; i < count; i++) + output[i] = ReadDynamicElementBytes(entry, i); + return output; + } + + /// + /// Reads an entry as an array of variable-length primitive arrays. Each element is a byte array decoded into T[]. + /// + /// Unmanaged element type. + /// The entry to read. + /// Jagged array of primitive arrays. + /// If entry is missing or element length is not divisible by sizeof(T). + public T[][] ReadDynamicPrimitiveArrays(SnapEntryType entryType) where T : unmanaged + { + var bytes = ReadDynamicByteArrays(entryType); + var output = new T[bytes.Length][]; + var elementSize = Marshal.SizeOf(); + for (var i = 0; i < bytes.Length; i++) + { + if (bytes[i].Length % elementSize != 0) + { + throw new InvalidOperationException( + $"Dynamic entry '{entryType}' element {i} length {bytes[i].Length} is not divisible by element size {elementSize}."); + } + + var elementCount = bytes[i].Length / elementSize; + var row = new T[elementCount]; + bytes[i].AsSpan().CopyTo(MemoryMarshal.AsBytes(row.AsSpan())); + output[i] = row; + } + + return output; + } + + /// Releases the file stream and binary reader. + public void Dispose() + { + _reader.Dispose(); + _stream.Dispose(); + } + + private T ReadSingle(SnapEntryType entryType) where T : unmanaged + { + var arr = ReadPrimitiveArray(entryType); + if (arr.Length == 0) + throw new InvalidOperationException($"Entry '{entryType}' has no elements."); + return arr[0]; + } + + private byte[] ReadConstEntryBytes(EntryData entry, int startIndex, int count) + { + if (entry.Format == SnapEntryFormat.SingleElement && startIndex == 0 && count == 1) + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], checked((long)entry.HeaderMeta), checked((int)entry.EntriesMeta)); + + if (entry.Format != SnapEntryFormat.ConstantSizeElementArray) + throw new InvalidOperationException($"Entry '{entry.EntryType}' is not a constant-size array."); + + var byteOffset = checked((long)entry.EntriesMeta * startIndex); + var byteLength = checked((int)(entry.EntriesMeta * (uint)count)); + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], byteOffset, byteLength); + } + + private byte[] ReadDynamicElementBytes(EntryData entry, int elementIndex) + { + GetDynamicElementBounds(entry, elementIndex, out var start, out var length); + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, length); + } + + private static void GetDynamicElementBounds(EntryData entry, int elementIndex, out long start, out int length) + { + if (entry.DynamicOffsets == null) + throw new InvalidOperationException($"Entry '{entry.EntryType}' has no dynamic offsets."); + if (elementIndex < 0 || elementIndex >= entry.DynamicOffsets.Length) + throw new ArgumentOutOfRangeException(nameof(elementIndex)); + + start = entry.DynamicOffsets[elementIndex]; + var end = elementIndex == entry.DynamicOffsets.Length - 1 + ? checked((long)entry.HeaderMeta) + : entry.DynamicOffsets[elementIndex + 1]; + length = checked((int)(end - start)); + if (length < 0) + throw new InvalidOperationException($"Entry '{entry.EntryType}' has invalid dynamic offsets."); + } + + private byte[] ReadBlockRange(BlockData block, long blockRelativeOffset, int byteLength) + { + var output = new byte[byteLength]; + if (byteLength == 0) + return output; + + ReadBlockRange(block, blockRelativeOffset, output); + return output; + } + + private void ReadBlockRange(BlockData block, long blockRelativeOffset, Span destination) + { + if (destination.Length == 0) + return; + + var chunkSize = checked((long)block.ChunkSize); + var readCursor = 0; + var offset = blockRelativeOffset; + while (readCursor < destination.Length) + { + var chunkIndex = checked((int)(offset / chunkSize)); + if (chunkIndex < 0 || chunkIndex >= block.ChunkOffsets.Length) + throw new InvalidOperationException("Chunk index out of range while reading snapshot block."); + + var offsetInChunk = offset % chunkSize; + var availableInChunk = chunkSize - offsetInChunk; + var toRead = (int)Math.Min(availableInChunk, destination.Length - readCursor); + var absoluteFileOffset = checked(block.ChunkOffsets[chunkIndex] + offsetInChunk); + + _stream.Position = absoluteFileOffset; + var read = _stream.Read(destination.Slice(readCursor, toRead)); + if (read != toRead) + throw new InvalidOperationException("Unexpected EOF while reading snapshot block."); + + readCursor += toRead; + offset += toRead; + } + } + + private void EnsureDefined(SnapEntryType entryType) + { + var idx = (int)entryType; + if (idx < 0 || idx >= _entries.Length) + throw new InvalidOperationException($"Entry type index out of range: {entryType}"); + if (!_entries[idx].IsDefined) + throw new InvalidOperationException($"Entry '{entryType}' is missing in this snapshot."); + } + + private static void ValidateSignatures(BinaryReader reader, long fileLength, out long chapterDirectoryOffset) + { + if (fileLength < 16) + throw new InvalidOperationException("Snapshot file is too small."); + + reader.BaseStream.Position = 0; + var headerSig = reader.ReadUInt32(); + if (headerSig != HeaderSignature) + throw new InvalidOperationException($"Invalid snapshot header signature: 0x{headerSig:X8}"); + + reader.BaseStream.Position = fileLength - sizeof(uint); + var footerSig = reader.ReadUInt32(); + if (footerSig != FooterSignature) + throw new InvalidOperationException($"Invalid snapshot footer signature: 0x{footerSig:X8}"); + + reader.BaseStream.Position = fileLength - sizeof(uint) - sizeof(long); + chapterDirectoryOffset = reader.ReadInt64(); + if (chapterDirectoryOffset <= 0 || chapterDirectoryOffset >= fileLength) + throw new InvalidOperationException("Snapshot chapter directory offset is invalid."); + } + + private static long[] ReadEntryOffsets(BinaryReader reader, long entryDirectoryOffset) + { + reader.BaseStream.Position = entryDirectoryOffset; + var entryCount = reader.ReadInt32(); + if (entryCount <= 0) + return []; + + var offsets = new long[entryCount]; + for (var i = 0; i < entryCount; i++) + offsets[i] = reader.ReadInt64(); + return offsets; + } + + private static long[] ReadBlockOffsets(BinaryReader reader, long blockSectionOffset) + { + reader.BaseStream.Position = blockSectionOffset; + var blockVersion = reader.ReadUInt32(); + if (blockVersion != BlockSectionVersion) + throw new InvalidOperationException($"Unsupported block section version: 0x{blockVersion:X8}"); + + var blockCount = reader.ReadInt32(); + if (blockCount <= 0) + throw new InvalidOperationException("Snapshot block section has no blocks."); + + var offsets = new long[blockCount]; + for (var i = 0; i < blockCount; i++) + offsets[i] = reader.ReadInt64(); + return offsets; + } + + private static BlockData[] ReadBlocks(BinaryReader reader, long[] blockOffsets) + { + var blocks = new BlockData[blockOffsets.Length]; + for (var i = 0; i < blockOffsets.Length; i++) + { + reader.BaseStream.Position = blockOffsets[i]; + var chunkSize = reader.ReadUInt64(); + var totalBytes = reader.ReadUInt64(); + if (chunkSize == 0) + throw new InvalidOperationException($"Block {i} has zero chunk size."); + + var offsetCount = (int)(totalBytes / chunkSize + (totalBytes % chunkSize == 0 ? 0UL : 1UL)); + var chunkOffsets = new long[offsetCount]; + for (var c = 0; c < offsetCount; c++) + chunkOffsets[c] = reader.ReadInt64(); + + blocks[i] = new BlockData(chunkSize, totalBytes, chunkOffsets); + } + + return blocks; + } + + private static EntryData[] ReadEntries(BinaryReader reader, long[] entryOffsets) + { + var entries = new EntryData[entryOffsets.Length]; + for (var i = 0; i < entries.Length; i++) + entries[i] = EntryData.Undefined((SnapEntryType)i); + + for (var i = 0; i < entryOffsets.Length; i++) + { + var offset = entryOffsets[i]; + if (offset == 0) + continue; + + reader.BaseStream.Position = offset; + var format = (SnapEntryFormat)reader.ReadUInt16(); + var blockIndex = reader.ReadUInt32(); + var entriesMeta = reader.ReadUInt32(); + var headerMeta = reader.ReadUInt64(); + long[]? dynamicOffsets = null; + + if (format == SnapEntryFormat.DynamicSizeElementArray) + { + var count = checked((int)entriesMeta); + dynamicOffsets = new long[count]; + for (var d = 0; d < count; d++) + dynamicOffsets[d] = reader.ReadInt64(); + + if (count > 0) + { + var totalSize = dynamicOffsets[count - 1]; + for (var d = count - 1; d >= 1; d--) + dynamicOffsets[d] = dynamicOffsets[d - 1]; + dynamicOffsets[0] = checked((long)headerMeta); + headerMeta = checked((ulong)totalSize); + } + } + + entries[i] = new EntryData( + (SnapEntryType)i, + true, + format, + blockIndex, + entriesMeta, + headerMeta, + dynamicOffsets); + } + + return entries; + } + + private sealed class BlockData + { + public BlockData(ulong chunkSize, ulong totalBytes, long[] chunkOffsets) + { + ChunkSize = chunkSize; + TotalBytes = totalBytes; + ChunkOffsets = chunkOffsets; + } + + public ulong ChunkSize { get; } + public ulong TotalBytes { get; } + public long[] ChunkOffsets { get; } + } + + private sealed class EntryData + { + public EntryData( + SnapEntryType entryType, + bool isDefined, + SnapEntryFormat format, + uint blockIndex, + uint entriesMeta, + ulong headerMeta, + long[]? dynamicOffsets) + { + EntryType = entryType; + IsDefined = isDefined; + Format = format; + BlockIndex = blockIndex; + EntriesMeta = entriesMeta; + HeaderMeta = headerMeta; + DynamicOffsets = dynamicOffsets; + } + + public static EntryData Undefined(SnapEntryType type) => new(type, false, SnapEntryFormat.Undefined, 0, 0, 0, null); + + public SnapEntryType EntryType { get; } + public bool IsDefined { get; } + public SnapEntryFormat Format { get; } + public uint BlockIndex { get; } + public uint EntriesMeta { get; } + public ulong HeaderMeta { get; } + public long[]? DynamicOffsets { get; } + public uint Count => Format switch + { + SnapEntryFormat.SingleElement => 1, + SnapEntryFormat.ConstantSizeElementArray => (uint)HeaderMeta, + SnapEntryFormat.DynamicSizeElementArray => EntriesMeta, + _ => 0 + }; + } +} + diff --git a/Core/Parser/SnapSectionDecoders.cs b/Core/Parser/SnapSectionDecoders.cs new file mode 100644 index 0000000..86afbd8 --- /dev/null +++ b/Core/Parser/SnapSectionDecoders.cs @@ -0,0 +1,423 @@ +namespace MemorySnapshotDataTools.Parser; + +/// +/// Decodes all snapshot sections from a into a single . +/// Reads metadata, native types/objects, connections, roots, memory regions, allocations, managed heap sections, type descriptions, and fields. +/// Validates array length consistency before returning. +/// +internal static class SnapSectionDecoders +{ + private const ulong HeapSectionTypeFlagMask = 1UL << 63; + + /// + /// Reads every required and optional entry from the snapshot and populates a . + /// + /// Open snapshot reader (e.g. from ). + /// A fully populated decoded snapshot; throws if required entries are missing or lengths are inconsistent. + /// If a required entry is missing or array lengths do not match. + public static DecodedSnapshot DecodeAll(SnapReader reader) + { + var formatVersion = reader.ReadMetadataVersion(); + var nativeObjectTypeIndices = ReadInts(reader, SnapEntryType.NativeObjects_NativeTypeArrayIndex); + var nativeObjectCount = nativeObjectTypeIndices.Length; + var nativeObjectInstanceIds = ReadInstanceIds(reader, formatVersion); + var nativeObjectGcHandleIndices = ReadNativeObjectGcHandleIndices(reader, formatVersion, nativeObjectCount); + var gcHandleTargets = ReadOptionalULongs(reader, SnapEntryType.GCHandles_Target); + var (connectionsFrom, connectionsTo) = ReadConnections( + reader, + formatVersion, + nativeObjectInstanceIds, + nativeObjectGcHandleIndices, + gcHandleTargets.Length); + var nativeMemoryRegionAddressBases = ReadULongs(reader, SnapEntryType.NativeMemoryRegions_AddressBase); + var nativeMemoryRegionCount = nativeMemoryRegionAddressBases.Length; + var nativeAllocationAddresses = ReadULongs(reader, SnapEntryType.NativeAllocations_Address); + var nativeAllocationCount = nativeAllocationAddresses.Length; + + var snapshot = new DecodedSnapshot + { + FormatVersion = formatVersion, + RecordDateTicksUtc = reader.ReadMetadataRecordDateTicks(), + NativeObjectTypeIndices = nativeObjectTypeIndices, + NativeObjectInstanceIds = nativeObjectInstanceIds, + NativeObjectSizes = ReadULongs(reader, SnapEntryType.NativeObjects_Size), + NativeObjectFlags = ReadIntsWithCount(reader, SnapEntryType.NativeObjects_Flags, nativeObjectCount, 0), + NativeObjectGcHandleIndices = nativeObjectGcHandleIndices, + GcHandleTargets = gcHandleTargets, + ConnectionsFrom = connectionsFrom, + ConnectionsTo = connectionsTo, + NativeRootIds = ReadLongs(reader, SnapEntryType.NativeRootReferences_Id), + NativeRootAccumulatedSizes = ReadULongs(reader, SnapEntryType.NativeRootReferences_AccumulatedSize), + NativeMemoryRegionAddressBases = nativeMemoryRegionAddressBases, + NativeMemoryRegionAddressSizes = ReadULongsWithCount(reader, SnapEntryType.NativeMemoryRegions_AddressSize, nativeMemoryRegionCount), + NativeMemoryRegionParentIndices = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_ParentIndex, nativeMemoryRegionCount, -1), + NativeMemoryRegionFirstAllocationIndices = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_FirstAllocationIndex, nativeMemoryRegionCount, -1), + NativeMemoryRegionNumAllocations = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_NumAllocations, nativeMemoryRegionCount, 0), + NativeAllocationAddresses = nativeAllocationAddresses, + NativeAllocationSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_Size, nativeAllocationCount), + NativeAllocationOverheadSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_OverheadSize, nativeAllocationCount), + NativeAllocationPaddingSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_PaddingSize, nativeAllocationCount), + NativeAllocationMemoryRegionIndices = ReadIntsWithCount(reader, SnapEntryType.NativeAllocations_MemoryRegionIndex, nativeAllocationCount, -1), + VirtualMachineInformation = ReadVirtualMachineInfo(reader), + ManagedHeapSectionStartAddresses = ReadManagedHeapSectionStartAddresses(reader, formatVersion), + ManagedHeapSectionBytes = ReadRequiredDynamicBytes(reader, SnapEntryType.ManagedHeapSections_Bytes), + ManagedTypeFlags = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_Flags), + ManagedTypeNames = ReadRequiredStrings(reader, SnapEntryType.TypeDescriptions_Name), + ManagedTypeAssemblies = ReadRequiredStrings(reader, SnapEntryType.TypeDescriptions_Assembly), + ManagedTypeFieldIndices = ReadRequiredDynamicInts(reader, SnapEntryType.TypeDescriptions_FieldIndices), + ManagedTypeBaseOrElementTypeIndices = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_BaseOrElementTypeIndex), + ManagedTypeSizes = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_Size), + ManagedTypeInfoAddresses = ReadRequiredULongs(reader, SnapEntryType.TypeDescriptions_TypeInfoAddress), + FieldOffsets = ReadRequiredInts(reader, SnapEntryType.FieldDescriptions_Offset), + FieldTypeIndices = ReadRequiredInts(reader, SnapEntryType.FieldDescriptions_TypeIndex), + FieldNames = ReadRequiredStrings(reader, SnapEntryType.FieldDescriptions_Name), + FieldIsStatic = ReadRequiredBytes(reader, SnapEntryType.FieldDescriptions_IsStatic), + }; + + snapshot.NativeTypeNames = ReadStringsWithCount(reader, SnapEntryType.NativeTypes_Name, 0); + snapshot.NativeObjectNames = ReadStringsWithCount(reader, SnapEntryType.NativeObjects_Name, snapshot.NativeObjectTypeIndices.Length); + snapshot.NativeRootAreaNames = ReadStringsWithCount(reader, SnapEntryType.NativeRootReferences_AreaName, snapshot.NativeRootIds.Length); + snapshot.NativeRootObjectNames = ReadStringsWithCount(reader, SnapEntryType.NativeRootReferences_ObjectName, snapshot.NativeRootIds.Length); + snapshot.NativeMemoryRegionNames = ReadStringsWithCount(reader, SnapEntryType.NativeMemoryRegions_Name, snapshot.NativeMemoryRegionAddressBases.Length); + snapshot.NativeMemoryLabelNames = ReadStrings(reader, SnapEntryType.NativeMemoryLabels_Name); + + ValidateLengths(snapshot); + return snapshot; + } + + private static void ValidateLengths(DecodedSnapshot snapshot) + { + var nativeCount = snapshot.NativeObjectNames.Length; + if (nativeCount > 0) + { + EnsureArrayLength(nativeCount, snapshot.NativeObjectTypeIndices.Length, "NativeObjects_NativeTypeArrayIndex"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectInstanceIds.Length, "NativeObjects_InstanceId"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectSizes.Length, "NativeObjects_Size"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectGcHandleIndices.Length, "NativeObjects_GCHandleIndex"); + if (snapshot.NativeObjectFlags.Length > 0) + EnsureArrayLength(nativeCount, snapshot.NativeObjectFlags.Length, "NativeObjects_Flags"); + } + + var rootsCount = snapshot.NativeRootIds.Length; + if (snapshot.NativeRootAreaNames.Length > 0) + EnsureArrayLength(rootsCount, snapshot.NativeRootAreaNames.Length, "NativeRootReferences_AreaName"); + if (snapshot.NativeRootObjectNames.Length > 0) + EnsureArrayLength(rootsCount, snapshot.NativeRootObjectNames.Length, "NativeRootReferences_ObjectName"); + EnsureArrayLength(rootsCount, snapshot.NativeRootAccumulatedSizes.Length, "NativeRootReferences_AccumulatedSize"); + + EnsureArrayLength(snapshot.ConnectionsFrom.Length, snapshot.ConnectionsTo.Length, "Connections_To"); + + var regionCount = snapshot.NativeMemoryRegionAddressBases.Length; + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionAddressSizes.Length, "NativeMemoryRegions_AddressSize"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionParentIndices.Length, "NativeMemoryRegions_ParentIndex"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionFirstAllocationIndices.Length, "NativeMemoryRegions_FirstAllocationIndex"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionNumAllocations.Length, "NativeMemoryRegions_NumAllocations"); + if (snapshot.NativeMemoryRegionNames.Length > 0) + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionNames.Length, "NativeMemoryRegions_Name"); + + var allocationCount = snapshot.NativeAllocationAddresses.Length; + EnsureArrayLength(allocationCount, snapshot.NativeAllocationSizes.Length, "NativeAllocations_Size"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationOverheadSizes.Length, "NativeAllocations_OverheadSize"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationPaddingSizes.Length, "NativeAllocations_PaddingSize"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationMemoryRegionIndices.Length, "NativeAllocations_MemoryRegionIndex"); + + EnsureArrayLength(snapshot.ManagedHeapSectionStartAddresses.Length, snapshot.ManagedHeapSectionBytes.Length, "ManagedHeapSections_Bytes"); + + var managedTypeCount = snapshot.ManagedTypeNames.Length; + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeFlags.Length, "TypeDescriptions_Flags"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeAssemblies.Length, "TypeDescriptions_Assembly"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeFieldIndices.Length, "TypeDescriptions_FieldIndices"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeBaseOrElementTypeIndices.Length, "TypeDescriptions_BaseOrElementTypeIndex"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeSizes.Length, "TypeDescriptions_Size"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeInfoAddresses.Length, "TypeDescriptions_TypeInfoAddress"); + + var fieldCount = snapshot.FieldNames.Length; + EnsureArrayLength(fieldCount, snapshot.FieldOffsets.Length, "FieldDescriptions_Offset"); + EnsureArrayLength(fieldCount, snapshot.FieldTypeIndices.Length, "FieldDescriptions_TypeIndex"); + EnsureArrayLength(fieldCount, snapshot.FieldIsStatic.Length, "FieldDescriptions_IsStatic"); + } + + private static void EnsureArrayLength(int expected, int actual, string name) + { + if (expected != actual) + throw new InvalidOperationException($"Array length mismatch for {name}. expected={expected}, actual={actual}"); + } + + private static string[] ReadStrings(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadUtf8StringArray(type) : []; + + private static string[] ReadStringsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount) + { + if (!reader.HasEntry(type)) + return fallbackCount > 0 ? Enumerable.Repeat(string.Empty, fallbackCount).ToArray() : []; + + try + { + return reader.ReadUtf8StringArray(type); + } + catch + { + var count = fallbackCount; + if (count <= 0) + { + try + { + count = checked((int)reader.GetEntryCount(type)); + } + catch + { + count = 0; + } + } + + return count > 0 ? Enumerable.Repeat(string.Empty, count).ToArray() : []; + } + } + + private static int[] ReadInts(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static int[] ReadRequiredInts(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static int[] ReadIntsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount, int fallbackValue = 0) + { + var values = ReadOptionalInts(reader, type); + if (values.Length > 0) + return values; + + return fallbackCount > 0 ? Enumerable.Repeat(fallbackValue, fallbackCount).ToArray() : []; + } + + private static long[] ReadRequiredLongs(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static ulong[] ReadRequiredULongs(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static ulong[] ReadULongsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount) + { + var values = ReadOptionalULongs(reader, type); + if (values.Length > 0) + return values; + + return fallbackCount > 0 ? new ulong[fallbackCount] : []; + } + + private static byte[] ReadRequiredBytes(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static string[] ReadRequiredStrings(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadUtf8StringArray(type); + } + + private static int[][] ReadRequiredDynamicInts(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadDynamicPrimitiveArrays(type); + } + + private static byte[][] ReadRequiredDynamicBytes(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadDynamicByteArrays(type); + } + + private static DecodedVirtualMachineInfo ReadVirtualMachineInfo(SnapReader reader) + { + EnsureEntryExists(reader, SnapEntryType.Metadata_VirtualMachineInformation); + var values = reader.ReadPrimitiveArray(SnapEntryType.Metadata_VirtualMachineInformation); + if (values.Length < 6) + { + throw new InvalidOperationException( + $"Metadata_VirtualMachineInformation expected at least 6 uints, found {values.Length}."); + } + + return new DecodedVirtualMachineInfo + { + PointerSize = values[0], + ObjectHeaderSize = values[1], + ArrayHeaderSize = values[2], + ArrayBoundsOffsetInHeader = values[3], + ArraySizeOffsetInHeader = values[4], + AllocationGranularity = values[5], + }; + } + + private static ulong[] ReadManagedHeapSectionStartAddresses(SnapReader reader, uint formatVersion) + { + var starts = ReadRequiredULongs(reader, SnapEntryType.ManagedHeapSections_StartAddress); + if (formatVersion < SnapFormatVersion.MemLabelSizeAndHeapIdVersion) + return starts; + + var unmasked = new ulong[starts.Length]; + for (var i = 0; i < starts.Length; i++) + unmasked[i] = starts[i] & ~HeapSectionTypeFlagMask; + return unmasked; + } + + private static ulong[] ReadInstanceIds(SnapReader reader, uint formatVersion) + { + if (!reader.HasEntry(SnapEntryType.NativeObjects_InstanceId)) + return []; + + if (formatVersion >= SnapFormatVersion.EntityIDAs8ByteStructs) + return reader.ReadPrimitiveArray(SnapEntryType.NativeObjects_InstanceId); + + var ids32 = reader.ReadPrimitiveArray(SnapEntryType.NativeObjects_InstanceId); + var ids = new ulong[ids32.Length]; + for (var i = 0; i < ids32.Length; i++) + ids[i] = unchecked((uint)ids32[i]); + return ids; + } + + private static int[] ReadNativeObjectGcHandleIndices(SnapReader reader, uint formatVersion, int nativeObjectCount) + { + if (formatVersion < SnapFormatVersion.NativeConnectionsAsInstanceIdsVersion) + return Enumerable.Repeat(-1, nativeObjectCount).ToArray(); + + var gcHandleIndices = ReadOptionalInts(reader, SnapEntryType.NativeObjects_GCHandleIndex); + if (gcHandleIndices.Length == 0) + gcHandleIndices = ReadOptionalInts(reader, SnapEntryType.NativeObjects_GCHandleIndex_Legacy); + if (gcHandleIndices.Length == nativeObjectCount) + return gcHandleIndices; + + var fallback = Enumerable.Repeat(-1, nativeObjectCount).ToArray(); + if (gcHandleIndices.Length == 0) + return fallback; + + Array.Copy(gcHandleIndices, fallback, Math.Min(gcHandleIndices.Length, fallback.Length)); + return fallback; + } + + private static (int[] from, int[] to) ReadConnections( + SnapReader reader, + uint formatVersion, + ulong[] nativeObjectInstanceIds, + int[] nativeObjectGcHandleIndices, + int gcHandleCount) + { + if (!reader.HasEntry(SnapEntryType.Connections_From) || !reader.HasEntry(SnapEntryType.Connections_To)) + return ([], []); + + if (formatVersion < SnapFormatVersion.NativeConnectionsAsInstanceIdsVersion) + { + var fromUnified = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + var toUnified = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + if (fromUnified.Length != toUnified.Length) + throw new InvalidOperationException($"Array length mismatch for Connections_To. expected={fromUnified.Length}, actual={toUnified.Length}"); + return (fromUnified, toUnified); + } + + ulong[] fromInstanceIds; + ulong[] toInstanceIds; + if (formatVersion >= SnapFormatVersion.EntityIDAs8ByteStructs) + { + fromInstanceIds = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + toInstanceIds = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + } + else + { + var from32 = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + var to32 = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + fromInstanceIds = new ulong[from32.Length]; + toInstanceIds = new ulong[to32.Length]; + for (var i = 0; i < from32.Length; i++) + fromInstanceIds[i] = unchecked((uint)from32[i]); + for (var i = 0; i < to32.Length; i++) + toInstanceIds[i] = unchecked((uint)to32[i]); + } + + if (fromInstanceIds.Length != toInstanceIds.Length) + throw new InvalidOperationException($"Array length mismatch for Connections_To. expected={fromInstanceIds.Length}, actual={toInstanceIds.Length}"); + + var instanceIdToUnifiedIndex = new Dictionary(nativeObjectInstanceIds.Length); + var instanceIdToGcHandleIndex = new Dictionary(nativeObjectInstanceIds.Length); + for (var i = 0; i < nativeObjectInstanceIds.Length; i++) + { + var instanceId = nativeObjectInstanceIds[i]; + instanceIdToUnifiedIndex[instanceId] = gcHandleCount + i; + var gcHandleIndex = i < nativeObjectGcHandleIndices.Length ? nativeObjectGcHandleIndices[i] : -1; + if (gcHandleIndex >= 0) + instanceIdToGcHandleIndex[instanceId] = gcHandleIndex; + } + + var remappedFrom = new List(fromInstanceIds.Length + instanceIdToGcHandleIndex.Count); + var remappedTo = new List(toInstanceIds.Length + instanceIdToGcHandleIndex.Count); + for (var i = 0; i < fromInstanceIds.Length; i++) + { + if (!instanceIdToUnifiedIndex.TryGetValue(fromInstanceIds[i], out var fromUnified)) + continue; + if (!instanceIdToUnifiedIndex.TryGetValue(toInstanceIds[i], out var toUnified)) + continue; + remappedFrom.Add(fromUnified); + remappedTo.Add(toUnified); + } + + foreach (var (instanceId, gcHandleIndex) in instanceIdToGcHandleIndex) + { + if (!instanceIdToUnifiedIndex.TryGetValue(instanceId, out var fromUnified)) + continue; + remappedFrom.Add(fromUnified); + remappedTo.Add(gcHandleIndex); + } + + return (remappedFrom.ToArray(), remappedTo.ToArray()); + } + + private static int[] ReadOptionalInts(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + return []; + + try + { + return reader.ReadPrimitiveArray(type); + } + catch + { + return []; + } + } + + private static long[] ReadLongs(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static ulong[] ReadULongs(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static ulong[] ReadOptionalULongs(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + return []; + try + { + return reader.ReadPrimitiveArray(type); + } + catch + { + return []; + } + } + + private static void EnsureEntryExists(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + throw new InvalidOperationException($"Required snapshot entry '{type}' is missing."); + } +} + diff --git a/Core/Parser/SnapshotBridge.cs b/Core/Parser/SnapshotBridge.cs new file mode 100644 index 0000000..be50aa6 --- /dev/null +++ b/Core/Parser/SnapshotBridge.cs @@ -0,0 +1,261 @@ +using System.Globalization; +using MemorySnapshotDataTools.Parser; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Bridge between raw Unity .snap file format and . +/// Reads a snapshot via , decodes sections with , +/// then extracts native objects, managed heap objects, connections, roots, memory regions, and allocations. +/// +public static class SnapshotBridge +{ + /// + /// Reads the snapshot from disk, decodes all sections, and extracts raw data into a instance. + /// Reports progress via and respects for cancellation. + /// + /// Full path to the .snap file. + /// Reporter for status messages during extraction. + /// Cancellation token. + /// Fully populated raw snapshot data, validated in memory. + /// When is cancelled. + public static RawSnapshotData ExtractRawData(string snapshotPath, IProgressReporter progress, CancellationToken token) + { + progress.Report("Reading snapshot sections..."); + using var reader = SnapReader.Open(snapshotPath); + var decoded = SnapSectionDecoders.DecodeAll(reader); + token.ThrowIfCancellationRequested(); + return ExtractFromDecoded(decoded, snapshotPath); + } + + /// + /// Extracts raw snapshot data from an already-decoded snapshot. Used by tests and by after decoding. + /// Populates native roots, memory regions, allocations, native objects, managed objects (via crawler), and connections, then validates. + /// + /// Decoded snapshot from . + /// Path to the source .snap file (stored in ). + /// Validated . + public static RawSnapshotData ExtractFromDecoded(DecodedSnapshot decoded, string snapshotPath) + { + var data = new RawSnapshotData + { + SnapshotInfo = new SnapshotInfo + { + SnapshotPath = snapshotPath, + ExportedAtUtc = DateTime.UtcNow.ToString("O", CultureInfo.InvariantCulture), + UnityVersion = $"format:{decoded.FormatVersion}", + } + }; + + ExtractNativeRoots(decoded, data.NativeRoots); + ExtractMemoryRegions(decoded, data.MemoryRegions); + ExtractNativeAllocations(decoded, data.NativeAllocations); + ExtractNativeObjects(decoded, data.NativeObjects); + var managedCrawl = ManagedSnapshotCrawler.Crawl(decoded); + data.ManagedObjects.AddRange(managedCrawl.ManagedObjects); + ExtractConnections(decoded, managedCrawl.ManagedConnections, data.Connections); + ValidateStrictInMemory(data); + return data; + } + + private static void ExtractNativeRoots(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeRootIds.Length; + for (var i = 0; i < decoded.NativeRootIds.Length; i++) + { + output.Add(new NativeRootRow + { + RootIndex = i, + RootId = decoded.NativeRootIds[i], + AreaName = decoded.NativeRootAreaNames[i] ?? string.Empty, + ObjectName = decoded.NativeRootObjectNames[i] ?? string.Empty, + AccumulatedSizeBytes = decoded.NativeRootAccumulatedSizes[i], + }); + } + } + + private static void ExtractNativeObjects(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeObjectNames.Length; + for (var i = 0; i < decoded.NativeObjectNames.Length; i++) + { + var typeIndex = decoded.NativeObjectTypeIndices[i]; + output.Add(new NativeObjectRow + { + NativeObjectIndex = i, + InstanceId = decoded.NativeObjectInstanceIds[i].ToString(CultureInfo.InvariantCulture), + Name = decoded.NativeObjectNames[i] ?? string.Empty, + SizeBytes = decoded.NativeObjectSizes[i], + TypeIndex = typeIndex, + NativeTypeName = typeIndex >= 0 && typeIndex < decoded.NativeTypeNames.Length + ? decoded.NativeTypeNames[typeIndex] ?? string.Empty + : string.Empty, + IsDestroyed = i < decoded.NativeObjectFlags.Length && (decoded.NativeObjectFlags[i] & 0x8) != 0, + }); + } + } + + private static void ExtractMemoryRegions(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeMemoryRegionAddressBases.Length; + for (var i = 0; i < decoded.NativeMemoryRegionAddressBases.Length; i++) + { + output.Add(new MemoryRegionRow + { + RegionIndex = i, + AddressBase = decoded.NativeMemoryRegionAddressBases[i], + AddressSize = decoded.NativeMemoryRegionAddressSizes[i], + Name = decoded.NativeMemoryRegionNames[i] ?? string.Empty, + ParentRegionIndex = decoded.NativeMemoryRegionParentIndices[i], + FirstAllocationIndex = decoded.NativeMemoryRegionFirstAllocationIndices[i], + NumAllocations = decoded.NativeMemoryRegionNumAllocations[i], + }); + } + } + + private static void ExtractNativeAllocations(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeAllocationAddresses.Length; + for (var i = 0; i < decoded.NativeAllocationAddresses.Length; i++) + { + output.Add(new NativeAllocationRow + { + AllocationIndex = i, + Address = decoded.NativeAllocationAddresses[i], + SizeBytes = decoded.NativeAllocationSizes[i], + OverheadSizeBytes = decoded.NativeAllocationOverheadSizes[i], + PaddingSizeBytes = decoded.NativeAllocationPaddingSizes[i], + MemoryRegionIndex = decoded.NativeAllocationMemoryRegionIndices[i], + }); + } + } + + private static void ExtractConnections(DecodedSnapshot decoded, List managedConnections, List output) + { + var dedupe = new HashSet(); + var gcHandleUniqueCount = decoded.GcHandleTargets.Length; + var count = decoded.ConnectionsFrom.Length; + output.Capacity = count + managedConnections.Count; + for (var i = 0; i < count; i++) + { + var fromSource = MapUnifiedIndexToSource(decoded.ConnectionsFrom[i], gcHandleUniqueCount); + var toSource = MapUnifiedIndexToSource(decoded.ConnectionsTo[i], gcHandleUniqueCount); + + var row = new ConnectionRow + { + FromKind = fromSource.Kind, + FromIndex = fromSource.Index, + ToKind = toSource.Kind, + ToIndex = toSource.Index, + ConnectionType = "native_connection", + }; + AddConnectionIfNew(output, dedupe, row); + } + + for (var i = 0; i < managedConnections.Count; i++) + AddConnectionIfNew(output, dedupe, managedConnections[i]); + } + + private static SourceRef MapUnifiedIndexToSource(int unifiedIndex, int gcHandleUniqueCount) + => unifiedIndex < 0 + ? new SourceRef("unknown", unifiedIndex) + : unifiedIndex < gcHandleUniqueCount + ? new SourceRef("managed_object", unifiedIndex) + : new SourceRef("native_object", unifiedIndex - gcHandleUniqueCount); + + private readonly struct SourceRef(string kind, long index) + { + public string Kind { get; } = kind; + public long Index { get; } = index; + } + + private static void AddConnectionIfNew( + List output, + HashSet dedupe, + ConnectionRow row) + { + var key = new ConnectionKey(row.FromKind, row.FromIndex, row.ToKind, row.ToIndex, row.ConnectionType); + if (dedupe.Add(key)) + output.Add(row); + } + + private readonly record struct ConnectionKey( + string FromKind, + long FromIndex, + string ToKind, + long ToIndex, + string ConnectionType); + + private static void ValidateStrictInMemory(RawSnapshotData data) + { + for (var i = 0; i < data.ManagedObjects.Count; i++) + { + var row = data.ManagedObjects[i]; + if (row.ManagedObjectIndex != i) + throw new InvalidOperationException($"Managed object index mismatch. expected={i}, actual={row.ManagedObjectIndex}"); + if (row.Address == 0) + throw new InvalidOperationException($"Managed object {i} has null address."); + if (row.SizeBytes <= 0) + throw new InvalidOperationException($"Managed object {i} has non-positive size {row.SizeBytes}."); + if (row.TypeIndex < 0 || string.IsNullOrWhiteSpace(row.ManagedTypeName)) + throw new InvalidOperationException($"Managed object {i} has unresolved managed type metadata."); + if (row.NativeObjectIndex < -1 || row.NativeObjectIndex >= data.NativeObjects.Count) + throw new InvalidOperationException($"Managed object {i} has invalid native_object_index {row.NativeObjectIndex}."); + } + + for (var i = 0; i < data.NativeObjects.Count; i++) + { + var row = data.NativeObjects[i]; + if (row.NativeObjectIndex != i) + throw new InvalidOperationException($"Native object index mismatch. expected={i}, actual={row.NativeObjectIndex}"); + } + + for (var i = 0; i < data.MemoryRegions.Count; i++) + { + var row = data.MemoryRegions[i]; + if (row.RegionIndex != i) + throw new InvalidOperationException($"Memory region index mismatch. expected={i}, actual={row.RegionIndex}"); + if (row.ParentRegionIndex >= data.MemoryRegions.Count) + throw new InvalidOperationException($"Memory region {i} has invalid parent_region_index {row.ParentRegionIndex}."); + if (row.FirstAllocationIndex >= data.NativeAllocations.Count) + throw new InvalidOperationException($"Memory region {i} has invalid first_allocation_index {row.FirstAllocationIndex}."); + if (row.NumAllocations < 0) + throw new InvalidOperationException($"Memory region {i} has negative num_allocations {row.NumAllocations}."); + } + + for (var i = 0; i < data.NativeAllocations.Count; i++) + { + var row = data.NativeAllocations[i]; + if (row.AllocationIndex != i) + throw new InvalidOperationException($"Native allocation index mismatch. expected={i}, actual={row.AllocationIndex}"); + if (row.MemoryRegionIndex >= data.MemoryRegions.Count) + throw new InvalidOperationException($"Native allocation {i} has invalid memory_region_index {row.MemoryRegionIndex}."); + } + + for (var i = 0; i < data.Connections.Count; i++) + { + var c = data.Connections[i]; + ValidateEndpoint(c.FromKind, c.FromIndex, data, $"connections[{i}].from"); + ValidateEndpoint(c.ToKind, c.ToIndex, data, $"connections[{i}].to"); + } + } + + private static void ValidateEndpoint(string kind, long index, RawSnapshotData data, string label) + { + if (kind == "managed_object") + { + if (index < 0 || index >= data.ManagedObjects.Count) + throw new InvalidOperationException($"{label} points to out-of-range managed object index {index}."); + return; + } + + if (kind == "native_object") + { + if (index < 0 || index >= data.NativeObjects.Count) + throw new InvalidOperationException($"{label} points to out-of-range native object index {index}."); + return; + } + + throw new InvalidOperationException($"{label} has unsupported endpoint kind '{kind}'."); + } +} diff --git a/Core/Report/Queries/DuckDbReportQueries.cs b/Core/Report/Queries/DuckDbReportQueries.cs new file mode 100644 index 0000000..80651da --- /dev/null +++ b/Core/Report/Queries/DuckDbReportQueries.cs @@ -0,0 +1,58 @@ +using DuckDB.NET.Data; + +namespace MemorySnapshotDataTools.Report.Queries; + +/// DuckDB implementation of . Opens the database at construction and executes report SQL via DuckDB.NET. +internal sealed class DuckDbReportQueries : IReportQueryBackend +{ + private readonly DuckDBConnection _connection; + + /// Opens a connection to the DuckDB database at the given path. + /// Path to the .duckdb file. + public DuckDbReportQueries(string dbPath) + { + _connection = new DuckDBConnection($"Data Source={dbPath}"); + _connection.Open(); + } + + /// + public ReportBackendDialect Dialect => ReportBackendDialect.DuckDb; + + /// + public (string[] Columns, List Rows) ExecuteQuery(string sql) + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = sql; + using var reader = cmd.ExecuteReader(); + var columns = new string[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + columns[i] = reader.GetName(i); + var rows = new List(); + while (reader.Read()) + { + var row = new object?[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + row[i] = reader.IsDBNull(i) ? null : reader.GetValue(i); + rows.Add(row); + } + return (columns, rows); + } + + /// + public bool HasColumn(string tableName, string columnName) + { + try + { + var (_, rows) = ExecuteQuery( + $"SELECT 1 FROM information_schema.columns WHERE table_schema = 'main' AND table_name = '{tableName.Replace("'", "''")}' AND column_name = '{columnName.Replace("'", "''")}' LIMIT 1"); + return rows.Count > 0; + } + catch + { + return false; + } + } + + /// + public void Dispose() => _connection.Dispose(); +} diff --git a/Core/Report/Queries/IReportQueryBackend.cs b/Core/Report/Queries/IReportQueryBackend.cs new file mode 100644 index 0000000..6fafafc --- /dev/null +++ b/Core/Report/Queries/IReportQueryBackend.cs @@ -0,0 +1,32 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// Database dialect used for report queries (affects SQL for e.g. LOG/rounding). +internal enum ReportBackendDialect +{ + /// DuckDB backend. + DuckDb, + + /// SQLite backend. + Sqlite, +} + +/// +/// Abstraction for running report queries against an exported snapshot database. +/// Implementations exist for DuckDB and SQLite so the report generator is backend-agnostic. +/// +internal interface IReportQueryBackend : IDisposable +{ + /// Dialect of the connected database (used to choose dialect-specific SQL). + ReportBackendDialect Dialect { get; } + + /// Executes the given SQL and returns column names and rows (null for missing values). + /// SQL query (single statement). + /// Column names and list of row arrays. + (string[] Columns, List Rows) ExecuteQuery(string sql); + + /// Returns whether the table has a column with the given name. + /// Table name. + /// Column name. + /// True if the column exists. + bool HasColumn(string tableName, string columnName); +} diff --git a/Core/Report/Queries/ReportQueryFactory.cs b/Core/Report/Queries/ReportQueryFactory.cs new file mode 100644 index 0000000..9dc8b67 --- /dev/null +++ b/Core/Report/Queries/ReportQueryFactory.cs @@ -0,0 +1,34 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// +/// Creates an based on the database file extension (.duckdb, .db, .sqlite, .sqlite3). +/// If extension is unknown, tries DuckDB first, then falls back to SQLite. +/// +internal static class ReportQueryFactory +{ + /// Opens the database at the given path and returns the appropriate query backend. + /// Path to the exported database file. + /// A backend connected to the database; caller must dispose. + public static IReportQueryBackend Create(string dbPath) + { + var ext = Path.GetExtension(dbPath).ToLowerInvariant(); + return ext switch + { + ".duckdb" => new DuckDbReportQueries(dbPath), + ".db" or ".sqlite" or ".sqlite3" => new SqliteReportQueries(dbPath), + _ => TryOpenAsDuckDb(dbPath), + }; + } + + private static IReportQueryBackend TryOpenAsDuckDb(string dbPath) + { + try + { + return new DuckDbReportQueries(dbPath); + } + catch + { + return new SqliteReportQueries(dbPath); + } + } +} diff --git a/Core/Report/Queries/ReportSql.cs b/Core/Report/Queries/ReportSql.cs new file mode 100644 index 0000000..cac5145 --- /dev/null +++ b/Core/Report/Queries/ReportSql.cs @@ -0,0 +1,449 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// +/// SQL strings and helpers for report queries. Constants are used by ; dialect-specific methods (e.g. ) take . +/// +internal static class ReportSql +{ + /// Query for snapshot_info row (path, exported_at_utc, unity_version). + public const string SnapshotInfo = "SELECT snapshot_path, exported_at_utc, unity_version FROM snapshot_info;"; + + public const string TableCounts = """ + SELECT 'native_objects' AS table_name, COUNT(*) AS row_count FROM native_objects + UNION ALL SELECT 'managed_objects', COUNT(*) FROM managed_objects + UNION ALL SELECT 'connections', COUNT(*) FROM connections + UNION ALL SELECT 'native_roots', COUNT(*) FROM native_roots + UNION ALL SELECT 'memory_regions', COUNT(*) FROM memory_regions + UNION ALL SELECT 'native_allocations', COUNT(*) FROM native_allocations + ORDER BY 1; + """; + + public const string NativeOverview = """ + SELECT + COUNT(*) AS total_objects, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_native_mb, + ROUND(SUM(size_bytes) / 1024.0 / 1024 / 1024, 3) AS total_native_gb, + ROUND(AVG(size_bytes) / 1024.0, 2) AS avg_size_kb, + ROUND(MAX(size_bytes) / 1024.0 / 1024, 2) AS max_single_object_mb, + COUNT(DISTINCT native_type_name) AS distinct_types + FROM native_objects; + """; + + public const string NativeTypes = """ + SELECT + COALESCE(native_type_name, '(unknown)') AS type_name, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct_of_total + FROM native_objects + GROUP BY native_type_name + ORDER BY total_mb DESC + LIMIT 40; + """; + + /// Returns SQL for native object size distribution by log4 bucket. DuckDB uses LOG(4,x); SQLite uses log(x)/log(4). + /// Backend dialect for LOG function. + /// SQL string for the size bucket query. + public static string SizeBucketDistribution(ReportBackendDialect dialect) => dialect switch + { + ReportBackendDialect.DuckDb => """ + SELECT + CAST(FLOOR(LOG(4, NULLIF(size_bytes, 0))) AS INTEGER) AS log4_bucket, + ROUND(POWER(4.0, CAST(FLOOR(LOG(4, NULLIF(size_bytes, 0))) AS INTEGER)) / 1024.0 / 1024, 3) AS bucket_floor_mb, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb + FROM native_objects + WHERE size_bytes > 0 + GROUP BY log4_bucket + ORDER BY log4_bucket DESC; + """, + ReportBackendDialect.Sqlite => """ + SELECT + CAST(FLOOR(CAST(log(NULLIF(size_bytes, 0)) / log(4) AS REAL)) AS INTEGER) AS log4_bucket, + ROUND(POWER(4.0, CAST(FLOOR(CAST(log(NULLIF(size_bytes, 0)) / log(4) AS REAL)) AS INTEGER)) / 1024.0 / 1024, 3) AS bucket_floor_mb, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb + FROM native_objects + WHERE size_bytes > 0 + GROUP BY log4_bucket + ORDER BY log4_bucket DESC; + """, + _ => throw new ArgumentOutOfRangeException(nameof(dialect)), + }; + + public const string TopNativeObjects = """ + SELECT + native_object_index, + COALESCE(name, '(unnamed)') AS name, + COALESCE(native_type_name, '(unknown)') AS type_name, + ROUND(size_bytes / 1024.0 / 1024, 3) AS size_mb + FROM native_objects + ORDER BY size_bytes DESC + LIMIT 50; + """; + + public const string NativeTypesTop5Pct = """ + SELECT ROUND(SUM(pct), 1) AS top5_pct + FROM ( + SELECT ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct + FROM native_objects + GROUP BY native_type_name + ORDER BY SUM(size_bytes) DESC + LIMIT 5 + ) t; + """; + + public const string DuplicateAssets = """ + SELECT + COALESCE(name, '(unnamed)') AS name, + COALESCE(native_type_name, '(unknown)') AS type_name, + COUNT(*) AS duplicate_count, + ROUND(MIN(size_bytes) / 1024.0 / 1024, 3) AS min_size_mb, + ROUND(MAX(size_bytes) / 1024.0 / 1024, 3) AS max_size_mb, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 3) AS total_mb, + ROUND((COUNT(*) - 1) * AVG(size_bytes) / 1024.0 / 1024, 3) AS wasted_mb + FROM native_objects + WHERE name IS NOT NULL + GROUP BY name, native_type_name + HAVING COUNT(*) > 1 + ORDER BY wasted_mb DESC + LIMIT 50; + """; + + public const string DuplicateSummary = """ + SELECT + COUNT(*) AS duplicate_groups, + SUM(dup_count) - COUNT(*) AS extra_instances, + ROUND(SUM(wasted_bytes) / 1024.0 / 1024, 2) AS total_wasted_mb, + ROUND(100.0 * SUM(wasted_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), 1) AS pct_of_native_total + FROM ( + SELECT COUNT(*) AS dup_count, (COUNT(*) - 1) * AVG(size_bytes) AS wasted_bytes + FROM native_objects + WHERE name IS NOT NULL + GROUP BY name, native_type_name + HAVING COUNT(*) > 1 + ) t; + """; + + public const string ManagedOverview = """ + SELECT + COUNT(*) AS total_objects, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_managed_mb, + ROUND(AVG(size_bytes), 0) AS avg_size_bytes, + COUNT(DISTINCT managed_type_name) AS distinct_types, + COUNT(native_object_index) AS objects_with_native_ref + FROM managed_objects; + """; + + public const string ManagedTypes = """ + SELECT + COALESCE(managed_type_name, '(unknown)') AS type_name, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct_of_total + FROM managed_objects + GROUP BY managed_type_name + ORDER BY total_mb DESC + LIMIT 40; + """; + + public const string NativeRootsByArea = """ + SELECT + COALESCE(area_name, '(unknown)') AS area_name, + COUNT(*) AS root_count, + ROUND(SUM(accumulated_size_bytes) / 1024.0 / 1024, 2) AS total_accumulated_mb + FROM native_roots + GROUP BY area_name + ORDER BY total_accumulated_mb DESC; + """; + + public const string NativeRootsTop = """ + SELECT + root_id, + COALESCE(area_name, '(unknown)') AS area_name, + COALESCE(object_name, '(unnamed)') AS object_name, + ROUND(accumulated_size_bytes / 1024.0 / 1024, 3) AS accumulated_mb + FROM native_roots + ORDER BY accumulated_size_bytes DESC + LIMIT 30; + """; + + public const string MemoryRegions = """ + SELECT + r.region_index, + COALESCE(r.name, '(unnamed)') AS region_name, + COALESCE(p.name, '—') AS parent_name, + ROUND(r.address_size / 1024.0 / 1024, 2) AS size_mb, + r.num_allocations + FROM memory_regions r + LEFT JOIN memory_regions p ON p.region_index = r.parent_region_index + ORDER BY r.address_size DESC + LIMIT 40; + """; + + public const string AllocationEfficiency = """ + SELECT + COALESCE(r.name, '(unnamed)') AS region_name, + r.num_allocations, + ROUND(r.address_size / 1024.0 / 1024, 2) AS region_size_mb, + ROUND(SUM(a.size_bytes) / 1024.0 / 1024, 2) AS payload_mb, + ROUND(SUM(a.overhead_size_bytes) / 1024.0 / 1024, 2) AS overhead_mb, + ROUND(SUM(a.padding_size_bytes) / 1024.0 / 1024, 2) AS padding_mb, + ROUND(100.0 * SUM(a.size_bytes) / NULLIF(r.address_size, 0), 1) AS utilization_pct + FROM memory_regions r + LEFT JOIN native_allocations a ON a.memory_region_index = r.region_index + GROUP BY r.region_index, r.name, r.address_size, r.num_allocations + HAVING SUM(a.size_bytes) IS NOT NULL + ORDER BY payload_mb DESC NULLS LAST + LIMIT 30; + """; + + public const string ConnectionTypes = """ + SELECT connection_type, COUNT(*) AS edge_count + FROM connections + GROUP BY connection_type + ORDER BY edge_count DESC; + """; + + public const string MostReferenced = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.from_index) AS inbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.to_index + WHERE c.to_kind = 'native_object' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY inbound_refs DESC + LIMIT 20; + """; + + public const string MostReferencedExclMonoScript = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.from_index) AS inbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.to_index + WHERE c.to_kind = 'native_object' AND n.native_type_name != 'MonoScript' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY inbound_refs DESC + LIMIT 20; + """; + + public const string MostOutbound = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.to_index) AS outbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.from_index + WHERE c.from_kind = 'native_object' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY outbound_refs DESC + LIMIT 20; + """; + + public const string Top50Summary = """ + SELECT + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), 1) AS pct_of_native_total + FROM (SELECT size_bytes FROM native_objects ORDER BY size_bytes DESC LIMIT 50) t; + """; + + // --------------------------------------------------------------------------- + // Leaked Shell analysis + // Pattern A: native object still in memory but is_destroyed=true + // Pattern B: native object freed; managed C# wrapper is orphaned (native_object_index IS NULL) + // --------------------------------------------------------------------------- + + public const string LeakedBByType = """ + SELECT + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + COUNT(1) AS leaked_count + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ) + GROUP BY 1 + ORDER BY leaked_count DESC; + """; + + public const string LeakedBStats = """ + SELECT COUNT(1) AS total_orphaned + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ); + """; + + public const string LeakedCombined = """ + SELECT + pattern, + COALESCE(native_type_name, 'unknown (freed)') AS native_type_name, + managed_type_name, + leaked_count, + ROUND(native_mb_retained, 2) AS native_mb_retained + FROM ( + SELECT + 'A: Destroyed (native still in memory)' AS pattern, + n.native_type_name, + m.managed_type_name, + COUNT(1) AS leaked_count, + SUM(n.size_bytes) / 1024.0 / 1024 AS native_mb_retained + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + GROUP BY 2, 3 + + UNION ALL + + SELECT + 'B: Orphaned (native freed)', + 'unknown (freed)', + m.managed_type_name, + COUNT(1), + 0.0 + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ) + GROUP BY 3 + ) combined + ORDER BY leaked_count DESC; + """; + + public const string LeakedAStats = """ + SELECT + COUNT(*) AS total_leaked_count, + ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained, + ROUND( + 100.0 * SUM(n.size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), + 1 + ) AS pct_of_native_total + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true; + """; + + public const string LeakedAByType = """ + SELECT + COALESCE(n.native_type_name, '(unknown)') AS native_type, + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + COUNT(1) AS leaked_count, + ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + GROUP BY 1, 2 + ORDER BY native_mb_retained DESC; + """; + + public const string LeakedATopObjects = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS native_type, + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS own_size_mb + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + ORDER BY n.size_bytes DESC + LIMIT 20; + """; + + public const string AllDestroyedNatives = """ + SELECT + COALESCE(native_type_name, '(unknown)') AS native_type, + COUNT(1) AS destroyed_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained + FROM native_objects + WHERE is_destroyed = true + GROUP BY 1 + ORDER BY native_mb_retained DESC; + """; + + public const string AllDestroyedStats = """ + SELECT + COUNT(*) AS total_destroyed, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained, + ROUND( + 100.0 * SUM(size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), + 1 + ) AS pct_of_native_total + FROM native_objects + WHERE is_destroyed = true; + """; + + /// + /// Returns SQL for downstream_mb and exclusive_mb for a single native root. + /// rootIdx must be from our own query result (safe to interpolate). + /// + public static string DownstreamStats(long rootIdx) + { + return $""" + WITH RECURSIVE + reachable(node_index) AS ( + SELECT c.to_index + FROM connections c + WHERE c.from_index = {rootIdx} + AND c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + UNION + SELECT c.to_index + FROM reachable r + JOIN connections c ON c.from_index = r.node_index + WHERE c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + ), + reachable_set AS (SELECT DISTINCT node_index FROM reachable), + externally_referenced AS ( + SELECT DISTINCT c.to_index AS node_index + FROM connections c + JOIN reachable_set rs_to ON rs_to.node_index = c.to_index + LEFT JOIN reachable_set rs_from ON rs_from.node_index = c.from_index + WHERE c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + AND c.from_index != {rootIdx} + AND rs_from.node_index IS NULL + ), + exclusive_set AS ( + SELECT rs.node_index + FROM reachable_set rs + LEFT JOIN externally_referenced ext ON ext.node_index = rs.node_index + WHERE ext.node_index IS NULL + ) + SELECT + COALESCE( + (SELECT ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) + FROM reachable_set rs + JOIN native_objects n ON n.native_object_index = rs.node_index), + 0.0) AS downstream_mb, + COALESCE( + (SELECT ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) + FROM exclusive_set es + JOIN native_objects n ON n.native_object_index = es.node_index), + 0.0) AS exclusive_mb; + """; + } +} diff --git a/Core/Report/Queries/SqliteReportQueries.cs b/Core/Report/Queries/SqliteReportQueries.cs new file mode 100644 index 0000000..d3b8240 --- /dev/null +++ b/Core/Report/Queries/SqliteReportQueries.cs @@ -0,0 +1,61 @@ +using Microsoft.Data.Sqlite; + +namespace MemorySnapshotDataTools.Report.Queries; + +/// SQLite implementation of . Opens the database at construction and executes report SQL via Microsoft.Data.Sqlite. +internal sealed class SqliteReportQueries : IReportQueryBackend +{ + private readonly SqliteConnection _connection; + + /// Opens a connection to the SQLite database at the given path. + /// Path to the .db or .sqlite file. + public SqliteReportQueries(string dbPath) + { + _connection = new SqliteConnection($"Data Source={dbPath}"); + _connection.Open(); + } + + /// + public ReportBackendDialect Dialect => ReportBackendDialect.Sqlite; + + /// + public (string[] Columns, List Rows) ExecuteQuery(string sql) + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = sql; + using var reader = cmd.ExecuteReader(); + var columns = new string[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + columns[i] = reader.GetName(i); + var rows = new List(); + while (reader.Read()) + { + var row = new object?[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + row[i] = reader.IsDBNull(i) ? null : reader.GetValue(i); + rows.Add(row); + } + return (columns, rows); + } + + /// + public bool HasColumn(string tableName, string columnName) + { + try + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = "SELECT name FROM pragma_table_info($t) WHERE name = $c"; + cmd.Parameters.AddWithValue("$t", tableName); + cmd.Parameters.AddWithValue("$c", columnName); + using var reader = cmd.ExecuteReader(); + return reader.Read(); + } + catch + { + return false; + } + } + + /// + public void Dispose() => _connection.Dispose(); +} diff --git a/Core/Report/ReportBuilder.cs b/Core/Report/ReportBuilder.cs new file mode 100644 index 0000000..9997c2d --- /dev/null +++ b/Core/Report/ReportBuilder.cs @@ -0,0 +1,365 @@ +using System.Globalization; +using System.Linq; +using MemorySnapshotDataTools.Report.Queries; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Builds a by executing report SQL via and assembling groups/sections +/// (snapshot info, native objects, managed heap, connections, roots, memory regions). Uses for table and insight HTML. +/// +internal static class ReportBuilder +{ + /// + /// Runs all report queries against the backend, maps results into sections and groups, and returns a fully populated report model. + /// + /// Query backend (DuckDB or SQLite) connected to the report database. + /// Report title. + /// Database path (for display). + /// Generated timestamp string (UTC). + /// Populated ready for . + public static ReportModel Build(IReportQueryBackend backend, string title, string dbPath, string generatedAtUtc) + { + var model = new ReportModel + { + Title = title, + DbPath = dbPath, + GeneratedAtUtc = generatedAtUtc, + }; + + var (infoCols, infoRows) = backend.ExecuteQuery(ReportSql.SnapshotInfo); + var (countCols, countRows) = backend.ExecuteQuery(ReportSql.TableCounts); + + var kv = new Dictionary(); + if (infoRows.Count > 0) + { + var r = infoRows[0]; + kv["Snapshot Path"] = r.Length > 0 ? r[0] : null; + kv["Exported At (UTC)"] = r.Length > 1 ? r[1] : null; + kv["Unity Version"] = r.Length > 2 ? r[2] : null; + } + kv["Report Generated"] = generatedAtUtc; + + var totalRows = countRows.Sum(row => row.Length > 1 && row[1] != null ? Convert.ToInt64(row[1]) : 0); + var insightSnap = ReportHtmlHelper.RenderInsight( + $"Snapshot captured from Unity {ReportHtmlHelper.Escape(kv.GetValueOrDefault("Unity Version") ?? "—")} " + + $"containing {ReportHtmlHelper.FmtNum(totalRows)} rows across {ReportHtmlHelper.FmtNum(countRows.Count)} tables. " + + "If table counts appear unexpectedly low, verify the snapshot was captured with Capture All Objects and native memory collection enabled."); + + var snapContent = insightSnap + ReportHtmlHelper.RenderKv(kv) + ReportHtmlHelper.RenderTable(countCols, countRows); + var snapSection = new ReportSection + { + Anchor = "snapshot-info", + SectionTitle = "📋 Source & Table Counts", + ContentHtml = ReportHtmlHelper.Section("snapshot-info", "📋 Source & Table Counts", snapContent, null), + }; + var snapGroup = new ReportGroup + { + GroupTitle = "📸 Snapshot Info", + GroupDesc = "", + }; + snapGroup.Sections.Add(snapSection); + AddNav(model, snapGroup); + model.Groups.Add(snapGroup); + + // Native Objects group + var (natOvCols, natOvRows) = backend.ExecuteQuery(ReportSql.NativeOverview); + var (natTyCols, natTyRows) = backend.ExecuteQuery(ReportSql.NativeTypes); + var (bktCols, bktRows) = backend.ExecuteQuery(ReportSql.SizeBucketDistribution(backend.Dialect)); + var (top50Cols, top50Rows) = backend.ExecuteQuery(ReportSql.TopNativeObjects); + var (dupCols, dupRows) = backend.ExecuteQuery(ReportSql.DuplicateAssets); + var (dsCols, dsRows) = backend.ExecuteQuery(ReportSql.DuplicateSummary); + var (t50Cols, t50Rows) = backend.ExecuteQuery(ReportSql.Top50Summary); + var (_, top5PctRows) = natTyRows.Count > 0 ? backend.ExecuteQuery(ReportSql.NativeTypesTop5Pct) : (Array.Empty(), new List()); + var top5PctVal = top5PctRows.Count > 0 && top5PctRows[0].Length > 0 ? top5PctRows[0][0] : null; + + var natTotalObjects = natOvRows.Count > 0 && natOvRows[0].Length > 0 ? natOvRows[0][0] : 0; + var natTotalMb = natOvRows.Count > 0 && natOvRows[0].Length > 1 ? natOvRows[0][1] : 0; + var natAvgKb = natOvRows.Count > 0 && natOvRows[0].Length > 3 ? natOvRows[0][3] : 0; + var natMaxMb = natOvRows.Count > 0 && natOvRows[0].Length > 4 ? natOvRows[0][4] : 0; + var natDistinctTypes = natOvRows.Count > 0 && natOvRows[0].Length > 5 ? natOvRows[0][5] : 0; + + var insightNatOv = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(natTotalObjects)} native objects across " + + $"{ReportHtmlHelper.FmtNum(natDistinctTypes)} types occupy {ReportHtmlHelper.FmtNum(natTotalMb)} MB total " + + $"(avg {ReportHtmlHelper.FmtNum(natAvgKb)} KB; largest single object {ReportHtmlHelper.FmtNum(natMaxMb)} MB). " + + "This is your native memory baseline — compare it against your platform budget to gauge whether a reduction pass is needed."); + + var top5Names = natTyRows.Count > 0 ? string.Join(", ", natTyRows.Take(5).Select(r => r[0]?.ToString() ?? "—")) : "—"; + var insightTypes = ReportHtmlHelper.RenderInsight( + $"The top 5 types — {ReportHtmlHelper.Escape(top5Names)} — account for {ReportHtmlHelper.FmtNum(top5PctVal)}% of all native memory. " + + "These types are your highest-leverage optimization targets."); + + var t50Mb = t50Rows.Count > 0 && t50Rows[0].Length > 1 ? t50Rows[0][1] : 0; + var t50Pct = t50Rows.Count > 0 && t50Rows[0].Length > 2 ? t50Rows[0][2] : 0; + var insightTop50 = ReportHtmlHelper.RenderInsight( + $"The 50 largest individual objects account for {ReportHtmlHelper.FmtNum(t50Mb)} MB{ReportHtmlHelper.FmtNum(t50Pct)}% of all native memory. " + + "A small number of objects driving a large share of memory means optimizing even one large asset can have measurable impact.", + pills: [("Objects shown", "50", ""), ("Combined size", $"{t50Mb}", ""), ("% of native total", $"{t50Pct}%", "")]); + + var dsGroups = dsRows.Count > 0 && dsRows[0].Length > 0 ? dsRows[0][0] : 0; + var dsWastedMb = dsRows.Count > 0 && dsRows[0].Length > 2 ? dsRows[0][2] : 0; + var dsPct = dsRows.Count > 0 && dsRows[0].Length > 3 ? dsRows[0][3] : 0; + var insightDups = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(dsGroups)} asset name-collision groups were found, " + + $"with an upper-bound waste estimate of {ReportHtmlHelper.FmtNum(dsWastedMb)} MB ({ReportHtmlHelper.FmtNum(dsPct)}% of native memory). " + + "True asset duplication wastes memory proportional to its count.", + pills: [ + ("Name-collision groups", ReportHtmlHelper.FmtNum(dsGroups), dsGroups is int i && i > 0 ? "warn" : "good"), + ("Est. wasted memory", $"{dsWastedMb} MB", "warn"), + ]); + + var nativeGroup = new ReportGroup { GroupTitle = "🧱 Native Objects", GroupDesc = "Native Unity objects — types, sizes, and duplication" }; + nativeGroup.Sections.Add(new ReportSection { Anchor = "native-overview", SectionTitle = "📊 Overview", ContentHtml = ReportHtmlHelper.Section("native-overview", "📊 Overview", insightNatOv + ReportHtmlHelper.RenderTable(natOvCols, natOvRows), null) }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "native-types", SectionTitle = "🏆 Top Types by Size", ContentHtml = ReportHtmlHelper.Section("native-types", "🏆 Top Types by Size", insightTypes + ReportHtmlHelper.RenderTable(natTyCols, natTyRows, truncateCols: new HashSet { "type_name" }), natTyRows.Count), RowCount = natTyRows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "size-buckets", SectionTitle = "📐 Size Distribution (log₄)", ContentHtml = ReportHtmlHelper.Section("size-buckets", "📐 Size Distribution (log₄)", ReportHtmlHelper.RenderTable(bktCols, bktRows), bktRows.Count), RowCount = bktRows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "top-objects", SectionTitle = "🔝 Top 50 Largest Objects", ContentHtml = ReportHtmlHelper.Section("top-objects", "🔝 Top 50 Largest Objects", insightTop50 + ReportHtmlHelper.RenderTable(top50Cols, top50Rows, truncateCols: new HashSet { "name" }), top50Rows.Count), RowCount = top50Rows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "duplicates", SectionTitle = "⚠️ Duplicate Assets", ContentHtml = ReportHtmlHelper.Section("duplicates", "⚠️ Duplicate Assets", insightDups + ReportHtmlHelper.RenderTable(dupCols, dupRows, warnCol: "wasted_mb", truncateCols: new HashSet { "name" }), dupRows.Count), RowCount = dupRows.Count }); + AddNav(model, nativeGroup); + model.Groups.Add(nativeGroup); + + // Managed Heap + var (mgOvCols, mgOvRows) = backend.ExecuteQuery(ReportSql.ManagedOverview); + var (mgTyCols, mgTyRows) = backend.ExecuteQuery(ReportSql.ManagedTypes); + var mgTotal = mgOvRows.Count > 0 ? mgOvRows[0][0] : 0; + var mgMb = mgOvRows.Count > 0 ? mgOvRows[0][1] : 0; + var mgTypes = mgOvRows.Count > 0 && mgOvRows[0].Length > 3 ? mgOvRows[0][3] : 0; + var mgBridged = mgOvRows.Count > 0 && mgOvRows[0].Length > 4 ? mgOvRows[0][4] : 0; + var insightMgOv = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(mgTotal)} managed objects across {ReportHtmlHelper.FmtNum(mgTypes)} types occupy {ReportHtmlHelper.FmtNum(mgMb)} MB; " + + $"{ReportHtmlHelper.FmtNum(mgBridged)} have a corresponding native object. " + + "Large managed heaps increase GC pressure."); + var topMg = mgTyRows.Count > 0 ? mgTyRows[0][0]?.ToString() ?? "—" : "—"; + var topMgMb = mgTyRows.Count > 0 && mgTyRows[0].Length > 2 ? mgTyRows[0][2] : 0; + var insightMgTy = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.Escape(topMg)} is the largest managed allocator at {ReportHtmlHelper.FmtNum(topMgMb)} MB. " + + "This type is the primary driver of managed heap size and therefore GC pause duration."); + + var managedGroup = new ReportGroup { GroupTitle = "🧠 Managed Heap", GroupDesc = "GC-managed objects and type allocations" }; + managedGroup.Sections.Add(new ReportSection { Anchor = "managed-overview", SectionTitle = "📊 Overview", ContentHtml = ReportHtmlHelper.Section("managed-overview", "📊 Overview", insightMgOv + ReportHtmlHelper.RenderTable(mgOvCols, mgOvRows), null) }); + managedGroup.Sections.Add(new ReportSection { Anchor = "managed-types", SectionTitle = "🏆 Top Types by Size", ContentHtml = ReportHtmlHelper.Section("managed-types", "🏆 Top Types by Size", insightMgTy + ReportHtmlHelper.RenderTable(mgTyCols, mgTyRows, truncateCols: new HashSet { "type_name" }), mgTyRows.Count), RowCount = mgTyRows.Count }); + AddNav(model, managedGroup); + model.Groups.Add(managedGroup); + + // Leaked Shells + var hasIsDestroyed = backend.HasColumn("native_objects", "is_destroyed"); + var (lbCols, lbRows) = backend.ExecuteQuery(ReportSql.LeakedBByType); + var (lbsCols, lbsRows) = backend.ExecuteQuery(ReportSql.LeakedBStats); + var bTotal = lbsRows.Count > 0 && lbsRows[0].Length > 0 ? Convert.ToInt64(lbsRows[0][0] ?? 0) : 0L; + var topBType = lbRows.Count > 0 && lbRows[0].Length > 0 ? lbRows[0][0]?.ToString() ?? "—" : "—"; + var topBCount = lbRows.Count > 0 && lbRows[0].Length > 1 ? lbRows[0][1] : (object?)0; + var insightLb = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(bTotal)} orphaned managed wrappers detected — " + + "C# objects whose native counterpart was completely freed but whose GC references were never cleared. " + + $"The most common type is {ReportHtmlHelper.Escape(topBType)} " + + $"with {ReportHtmlHelper.FmtNum(topBCount)} instances. " + + "These objects waste managed heap space and GC scan time despite having no functional native backing. " + + "Fix by hooking OnDestroy (or equivalent) and nulling all strong C# references so the GC " + + "can collect them."); + + var leakedSections = new List(); + if (hasIsDestroyed) + { + var (lcCols, lcRows) = backend.ExecuteQuery(ReportSql.LeakedCombined); + var (lasCols, lasRows) = backend.ExecuteQuery(ReportSql.LeakedAStats); + var laTotal = lasRows.Count > 0 && lasRows[0].Length > 0 ? Convert.ToInt64(lasRows[0][0] ?? 0) : 0L; + var laMb = lasRows.Count > 0 && lasRows[0].Length > 1 ? ToDouble(lasRows[0][1]) : 0.0; + var laPct = lasRows.Count > 0 && lasRows[0].Length > 2 ? ToDouble(lasRows[0][2]) : 0.0; + var combinedTotal = laTotal + bTotal; + var insightLc = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(combinedTotal)} leaked C# shell objects detected: " + + $"{ReportHtmlHelper.FmtNum(laTotal)} Pattern A (native destroyed but still occupying " + + $"{laMb:N2} MB of native memory) and " + + $"{ReportHtmlHelper.FmtNum(bTotal)} Pattern B (native fully freed, managed wrapper orphaned). " + + "Leaked shells waste memory and can cause MissingReferenceException crashes at runtime. " + + "Prioritise Pattern A by native_mb_retained — each MB is real engine memory the runtime " + + "cannot reclaim until the C# reference chain is broken.", + pills: [ + ("Pattern A (destroyed)", ReportHtmlHelper.FmtNum(laTotal), laTotal > 0 ? "warn" : "good"), + ("Native MB retained", $"{laMb:N2} MB", laMb > 0 ? "warn" : "good"), + ("Pattern B (orphaned)", ReportHtmlHelper.FmtNum(bTotal), bTotal > 0 ? "warn" : "good"), + ]); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-summary", + SectionTitle = "📊 Summary (Both Patterns)", + ContentHtml = ReportHtmlHelper.Section("leaked-summary", "📊 Summary (Both Patterns)", insightLc + ReportHtmlHelper.RenderTable(lcCols, lcRows, truncateCols: new HashSet { "managed_type_name" }), lcRows.Count), + RowCount = lcRows.Count, + }); + + var (latCols, latRows) = backend.ExecuteQuery(ReportSql.LeakedAByType); + var topLatNative = latRows.Count > 0 && latRows[0].Length > 0 ? latRows[0][0]?.ToString() ?? "—" : "—"; + var topLatMb = latRows.Count > 0 && latRows[0].Length > 3 ? ToDouble(latRows[0][3]) : 0.0; + var insightLat = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(latRows.Count)} native/managed type pair(s) show Pattern A leaks. " + + "The worst offender by retained memory is " + + $"{ReportHtmlHelper.Escape(topLatNative)} holding " + + $"{topLatMb:N2} MB despite being destroyed. " + + "These native objects remain alive because managed C# references block GC collection. " + + "Track down the code paths that hold a reference to these types after Destroy() — " + + "common culprits: static caches, event listener captures, and async/coroutine closures."); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-a-types", + SectionTitle = "💥 Pattern A: Destroyed-but-Retained (by Type)", + ContentHtml = ReportHtmlHelper.Section("leaked-a-types", "💥 Pattern A: Destroyed-but-Retained (by Type)", insightLat + ReportHtmlHelper.RenderTable(latCols, latRows, truncateCols: new HashSet { "managed_type" }), latRows.Count), + RowCount = latRows.Count, + }); + + var (laoColsRaw, laoRows) = backend.ExecuteQuery(ReportSql.LeakedATopObjects); + var augLaoCols = laoColsRaw.Concat(new[] { "downstream_mb", "exclusive_mb", "total_freed_mb" }).ToArray(); + var augLaoRows = new List(); + foreach (var row in laoRows) + { + var rootIdx = row.Length > 0 ? Convert.ToInt64(row[0] ?? 0) : 0L; + var ownMb = row.Length > 4 ? ToDouble(row[4]) : 0.0; + var (downstreamCols, downstreamRows) = backend.ExecuteQuery(ReportSql.DownstreamStats(rootIdx)); + var dsMb = downstreamRows.Count > 0 && downstreamRows[0].Length > 0 ? ToDouble(downstreamRows[0][0]) : 0.0; + var exclMb = downstreamRows.Count > 0 && downstreamRows[0].Length > 1 ? ToDouble(downstreamRows[0][1]) : 0.0; + var totalFreed = Math.Round(ownMb + exclMb, 2); + augLaoRows.Add(row.Concat(new object?[] { Math.Round(dsMb, 2), Math.Round(exclMb, 2), totalFreed }).ToArray()); + } + augLaoRows = augLaoRows.OrderByDescending(r => r.Length > 0 ? ToDouble(r[^1]) : 0.0).ToList(); + + var topLaoName = augLaoRows.Count > 0 && augLaoRows[0].Length > 1 ? augLaoRows[0][1]?.ToString() ?? "—" : "—"; + var topLaoOwn = augLaoRows.Count > 0 && augLaoRows[0].Length > 4 ? ToDouble(augLaoRows[0][4]) : 0.0; + var topLaoExcl = augLaoRows.Count > 0 && augLaoRows[0].Length >= 2 ? ToDouble(augLaoRows[0][^2]) : 0.0; + var topLaoFreed = augLaoRows.Count > 0 && augLaoRows[0].Length > 0 ? ToDouble(augLaoRows[0][^1]) : 0.0; + var topLaoNameTrunc = topLaoName.Length > 30 ? topLaoName[..30] : topLaoName; + var insightLao = ReportHtmlHelper.RenderInsight( + "Top Pattern A leaked objects ranked by total_freed_mb " + + "(own size + exclusively-owned downstream memory). " + + "The highest-impact leak is " + + $"{ReportHtmlHelper.Escape(topLaoName)}: " + + $"fixing it would free {topLaoFreed:N2} MB total " + + $"({topLaoOwn:N2} MB own + {topLaoExcl:N2} MB exclusive downstream). " + + "exclusive_mb counts only downstream objects reachable solely through this object " + + "— assets shared with other live objects are excluded, so this is a conservative lower bound. " + + "Prioritise objects with large total_freed_mb: ensure Destroy() is always " + + "paired with reference nulling, and that no event listeners or coroutines capture a reference " + + "past the object's intended lifetime.", + pills: [ + ("Top leak", topLaoNameTrunc, "warn"), + ("Own size", $"{topLaoOwn:N2} MB", "warn"), + ("Excl. downstream", $"{topLaoExcl:N2} MB", "warn"), + ("Total freed", $"{topLaoFreed:N2} MB", "warn"), + ]); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-a-objects", + SectionTitle = "🔬 Pattern A: Top Individual Leaks + Exclusive Cost", + ContentHtml = ReportHtmlHelper.Section("leaked-a-objects", "🔬 Pattern A: Top Individual Leaks + Exclusive Cost", insightLao + ReportHtmlHelper.RenderTable(augLaoCols, augLaoRows, warnCol: "total_freed_mb", truncateCols: new HashSet { "name" }), augLaoRows.Count), + RowCount = augLaoRows.Count, + }); + + var (adnCols, adnRows) = backend.ExecuteQuery(ReportSql.AllDestroyedNatives); + var (adnsCols, adnsRows) = backend.ExecuteQuery(ReportSql.AllDestroyedStats); + var adnTotal = adnsRows.Count > 0 && adnsRows[0].Length > 0 ? Convert.ToInt64(adnsRows[0][0] ?? 0) : 0L; + var adnMb = adnsRows.Count > 0 && adnsRows[0].Length > 1 ? ToDouble(adnsRows[0][1]) : 0.0; + var adnPct = adnsRows.Count > 0 && adnsRows[0].Length > 2 ? ToDouble(adnsRows[0][2]) : 0.0; + var insightAdn = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(adnTotal)} native objects across " + + $"{ReportHtmlHelper.FmtNum(adnRows.Count)} type(s) carry is_destroyed=true, " + + $"retaining {adnMb:N2} MB ({adnPct:N1}% of total native memory). " + + "This is the full native cost of pending destructions — Pattern A leaks are a subset of this " + + "(only those with a surviving managed wrapper). " + + "A high count here that drops significantly after calling " + + "Resources.UnloadUnusedAssets() + GC.Collect() indicates the allocator " + + "is cleaning up but GC hasn't run yet; a persistently high count across snapshots points to " + + "genuine managed-side leaks blocking reclaim."); + leakedSections.Add(new ReportSection + { + Anchor = "all-destroyed", + SectionTitle = "🗑️ All Destroyed Natives (by Type)", + ContentHtml = ReportHtmlHelper.Section("all-destroyed", "🗑️ All Destroyed Natives (by Type)", insightAdn + ReportHtmlHelper.RenderTable(adnCols, adnRows), adnRows.Count), + RowCount = adnRows.Count, + }); + } + else + { + var schemaNoticeContent = ReportHtmlHelper.RenderInsight( + "Pattern A analysis (destroyed-but-retained natives) requires the " + + "is_destroyed column which is not present in this database. " + + "Re-export the snapshot with the latest version of the exporter to enable this analysis. " + + "Pattern B (orphaned managed wrappers) below is available without it."); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-schema-notice", + SectionTitle = "⚠️ Schema Notice", + ContentHtml = ReportHtmlHelper.Section("leaked-schema-notice", "⚠️ Schema Notice", schemaNoticeContent, null), + }); + } + + leakedSections.Add(new ReportSection + { + Anchor = "leaked-b", + SectionTitle = "👻 Pattern B: Orphaned Managed Wrappers", + ContentHtml = ReportHtmlHelper.Section("leaked-b", "👻 Pattern B: Orphaned Managed Wrappers", insightLb + ReportHtmlHelper.RenderTable(lbCols, lbRows), lbRows.Count), + RowCount = lbRows.Count, + }); + + var leakedGroup = new ReportGroup + { + GroupTitle = "🧟 Leaked Shells", + GroupDesc = "C# managed wrappers alive past their native object's destruction", + }; + foreach (var sec in leakedSections) + leakedGroup.Sections.Add(sec); + AddNav(model, leakedGroup); + model.Groups.Add(leakedGroup); + + // Native Roots + var (nrAreaCols, nrAreaRows) = backend.ExecuteQuery(ReportSql.NativeRootsByArea); + var (nrTopCols, nrTopRows) = backend.ExecuteQuery(ReportSql.NativeRootsTop); + var insightRoots = ReportHtmlHelper.RenderInsight("Native roots by area and top 30 by retained size."); + var rootsGroup = new ReportGroup { GroupTitle = "📍 Native Roots", GroupDesc = "Root references and retained size" }; + rootsGroup.Sections.Add(new ReportSection { Anchor = "roots-area", SectionTitle = "📍 By Area", ContentHtml = ReportHtmlHelper.Section("roots-area", "📍 By Area", insightRoots + ReportHtmlHelper.RenderTable(nrAreaCols, nrAreaRows), nrAreaRows.Count), RowCount = nrAreaRows.Count }); + rootsGroup.Sections.Add(new ReportSection { Anchor = "roots-top", SectionTitle = "🥇 Top 30 by Retained Size", ContentHtml = ReportHtmlHelper.Section("roots-top", "🥇 Top 30 by Retained Size", ReportHtmlHelper.RenderTable(nrTopCols, nrTopRows), nrTopRows.Count), RowCount = nrTopRows.Count }); + AddNav(model, rootsGroup); + model.Groups.Add(rootsGroup); + + // Memory Regions & Allocation Efficiency + var (regCols, regRows) = backend.ExecuteQuery(ReportSql.MemoryRegions); + var (aeCols, aeRows) = backend.ExecuteQuery(ReportSql.AllocationEfficiency); + var regionsGroup = new ReportGroup { GroupTitle = "🗂️ Memory & Allocations", GroupDesc = "Memory regions and allocation efficiency" }; + regionsGroup.Sections.Add(new ReportSection { Anchor = "regions", SectionTitle = "🗂️ Memory Regions", ContentHtml = ReportHtmlHelper.Section("regions", "🗂️ Memory Regions", ReportHtmlHelper.RenderTable(regCols, regRows), regRows.Count), RowCount = regRows.Count }); + regionsGroup.Sections.Add(new ReportSection { Anchor = "alloc-efficiency", SectionTitle = "⚡ Allocation Efficiency", ContentHtml = ReportHtmlHelper.Section("alloc-efficiency", "⚡ Allocation Efficiency", ReportHtmlHelper.RenderTable(aeCols, aeRows), aeRows.Count), RowCount = aeRows.Count }); + AddNav(model, regionsGroup); + model.Groups.Add(regionsGroup); + + // Connections + var (ctCols, ctRows) = backend.ExecuteQuery(ReportSql.ConnectionTypes); + var (mrCols, mrRows) = backend.ExecuteQuery(ReportSql.MostReferenced); + var (mrExCols, mrExRows) = backend.ExecuteQuery(ReportSql.MostReferencedExclMonoScript); + var (obCols, obRows) = backend.ExecuteQuery(ReportSql.MostOutbound); + var insightConn = ReportHtmlHelper.RenderInsight("Connection types and most-referenced / most-outbound native objects."); + var connGroup = new ReportGroup { GroupTitle = "🔗 Connections", GroupDesc = "Reference graph and connection types" }; + connGroup.Sections.Add(new ReportSection { Anchor = "connection-types", SectionTitle = "Connection Types", ContentHtml = ReportHtmlHelper.Section("connection-types", "Connection Types", insightConn + ReportHtmlHelper.RenderTable(ctCols, ctRows), ctRows.Count), RowCount = ctRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-referenced", SectionTitle = "Most Referenced (incl. MonoScript)", ContentHtml = ReportHtmlHelper.Section("most-referenced", "Most Referenced (incl. MonoScript)", ReportHtmlHelper.RenderTable(mrCols, mrRows, truncateCols: new HashSet { "name" }), mrRows.Count), RowCount = mrRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-referenced-excl", SectionTitle = "Most Referenced (excl. MonoScript)", ContentHtml = ReportHtmlHelper.Section("most-referenced-excl", "Most Referenced (excl. MonoScript)", ReportHtmlHelper.RenderTable(mrExCols, mrExRows, truncateCols: new HashSet { "name" }), mrExRows.Count), RowCount = mrExRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-outbound", SectionTitle = "Most Outbound", ContentHtml = ReportHtmlHelper.Section("most-outbound", "Most Outbound", ReportHtmlHelper.RenderTable(obCols, obRows, truncateCols: new HashSet { "name" }), obRows.Count), RowCount = obRows.Count }); + AddNav(model, connGroup); + model.Groups.Add(connGroup); + + return model; + } + + private static void AddNav(ReportModel model, ReportGroup group) + { + var navGroup = new NavGroup { GroupTitle = group.GroupTitle }; + foreach (var sec in group.Sections) + navGroup.Items.Add(new NavItem { Anchor = sec.Anchor, Title = sec.SectionTitle }); + model.NavGroups.Add(navGroup); + } + + private static double ToDouble(object? o) + { + if (o == null) return 0.0; + if (o is double d) return d; + if (o is float f) return f; + if (o is decimal m) return (double)m; + if (o is int i) return i; + if (o is long l) return l; + return double.TryParse(o.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var parsed) ? parsed : 0.0; + } +} diff --git a/Core/Report/ReportHtmlHelper.cs b/Core/Report/ReportHtmlHelper.cs new file mode 100644 index 0000000..e333c6a --- /dev/null +++ b/Core/Report/ReportHtmlHelper.cs @@ -0,0 +1,199 @@ +using System.Globalization; +using System.Text; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Helpers for building report HTML: escaping, number/percent formatting, table and KV rendering, insight blocks, and section/group wrappers. +/// +internal static class ReportHtmlHelper +{ + private static readonly HashSet NumericCols = [ + "obj_count", "edge_count", "root_count", "num_allocations", "inbound_refs", "outbound_refs", + "duplicate_count", "duplicate_groups", "extra_instances", "total_objects", + "distinct_types", "objects_with_native_ref", "region_count", "row_count", "log4_bucket", + "total_orphaned", "total_destroyed", "destroyed_count", "leaked_count", "total_leaked_count" + ]; + + private static readonly HashSet PctCols = [ + "pct_of_total", "pct_of_native_total", "utilization_pct", "overall_utilization_pct" + ]; + + /// HTML-encodes a value for safe inclusion in the report; null is rendered as styled "null". + /// Value to escape (null allowed). + /// Encoded string or null placeholder. + public static string Escape(object? val) + { + if (val == null) return "null"; + return System.Net.WebUtility.HtmlEncode(val.ToString() ?? ""); + } + + /// Formats a value as a number (N0 for integers, N2 for decimals); NaN/infinity and null are escaped. + /// Value to format. + /// Formatted string or escaped placeholder. + public static string FmtNum(object? val) + { + if (val == null) return Escape(val); + if (val is int i) return i.ToString("N0", CultureInfo.InvariantCulture); + if (val is long l) return l.ToString("N0", CultureInfo.InvariantCulture); + if (val is double d) + { + if (double.IsNaN(d) || double.IsInfinity(d)) return Escape(val); + if (d == Math.Truncate(d) && Math.Abs(d) < 1e15) + return ((long)d).ToString("N0", CultureInfo.InvariantCulture); + return d.ToString("N2", CultureInfo.InvariantCulture); + } + if (val is float f) + { + if (float.IsNaN(f) || float.IsInfinity(f)) return Escape(val); + return f.ToString("N2", CultureInfo.InvariantCulture); + } + if (val is decimal m) return m.ToString("N2", CultureInfo.InvariantCulture); + return Escape(val); + } + + /// Returns true if the column name is treated as numeric (right-aligned, N0/N2 formatting). + /// Column name (case-insensitive). + /// True if numeric. + public static bool IsNumericCol(string col) + { + var lower = col.ToLowerInvariant(); + if (NumericCols.Contains(lower) || PctCols.Contains(lower)) return true; + return lower.EndsWith("_mb", StringComparison.Ordinal) || lower.EndsWith("_gb", StringComparison.Ordinal) || + lower.EndsWith("_kb", StringComparison.Ordinal) || lower.EndsWith("_count", StringComparison.Ordinal); + } + + /// Returns true if the column is displayed as a percentage (suffix %). + /// Column name (case-insensitive). + /// True if percentage column. + public static bool IsPctCol(string col) => + PctCols.Contains(col.ToLowerInvariant()) || col.ToLowerInvariant().EndsWith("_pct", StringComparison.Ordinal); + + /// Formats a cell value for the given column (percent, number, or escaped text). + /// Column name (determines format). + /// Cell value. + /// HTML-safe formatted string. + public static string FmtCell(string col, object? val) + { + if (val == null) return "null"; + if (IsPctCol(col) && TryDouble(val, out var pct)) return pct.ToString("N1", CultureInfo.InvariantCulture) + "%"; + if (IsNumericCol(col)) return FmtNum(val); + return Escape(val); + } + + private static bool TryDouble(object? o, out double d) + { + d = 0; + if (o == null) return false; + if (o is double x) { d = x; return true; } + if (o is float f) { d = f; return true; } + if (o is decimal m) { d = (double)m; return true; } + if (o is int i) { d = i; return true; } + if (o is long l) { d = l; return true; } + return double.TryParse(o.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture, out d); + } + + /// Renders a sortable HTML table from column names and row arrays; optional warn column and truncation set. + /// Column headers. + /// Rows of cell values (length may vary per row). + /// If set, cells in this column with value > 0 get a warning style. + /// Column names to truncate with ellipsis and title=full value. + /// HTML fragment (table wrapped in div). + public static string RenderTable(string[] columns, List rows, string? warnCol = null, IReadOnlySet? truncateCols = null) + { + if (rows.Count == 0) + return "

No data available for this section.

"; + + var sb = new StringBuilder(); + sb.Append("
"); + foreach (var c in columns) + { + var numClass = IsNumericCol(c) ? " num" : ""; + sb.Append($""); + } + sb.Append(""); + + foreach (var row in rows) + { + sb.Append(""); + for (var i = 0; i < columns.Length; i++) + { + var col = columns[i]; + var val = i < row.Length ? row[i] : null; + var isNum = IsNumericCol(col); + var isTrunc = truncateCols != null && truncateCols.Contains(col); + var isWarn = warnCol == col && val != null && TryDouble(val, out var v) && v > 0; + var classes = new List(); + if (isNum) classes.Add("num"); + if (isTrunc) classes.Add("trunc"); + if (isWarn) classes.Add("warn"); + var cls = classes.Count > 0 ? " class=\"" + string.Join(" ", classes) + "\"" : ""; + var title = isTrunc && val != null ? " title=\"" + Escape(val) + "\"" : ""; + sb.Append($"{FmtCell(col, val)}"); + } + sb.Append(""); + } + sb.Append("
{Escape(c)}
"); + return sb.ToString(); + } + + /// Renders a key-value grid (e.g. snapshot path, version, generated date). + /// Label-to-value map. + /// HTML fragment (kv-grid div). + public static string RenderKv(IReadOnlyDictionary kv) + { + var sb = new StringBuilder(); + sb.Append("
"); + foreach (var (label, value) in kv) + { + var mono = label.Contains("path", StringComparison.OrdinalIgnoreCase) || label.Contains("version", StringComparison.OrdinalIgnoreCase) || label.Contains("date", StringComparison.OrdinalIgnoreCase); + var cls = mono ? "kv-value mono" : "kv-value"; + var display = value is int or long or double or float or decimal ? FmtNum(value) : Escape(value); + sb.Append($"
{Escape(label)}
{display}
"); + } + sb.Append("
"); + return sb.ToString(); + } + + /// Renders an insight block: paragraph plus optional stat pills (label, value, tone class e.g. "warn" or "good"). + /// Main text (may contain HTML). + /// Optional list of (label, value, tone) for pill display. + /// HTML fragment (insight div). + public static string RenderInsight(string text, List<(string Label, string Value, string Tone)>? pills = null) + { + var sb = new StringBuilder(); + sb.Append("

").Append(text).Append("

"); + if (pills != null && pills.Count > 0) + { + sb.Append("
"); + foreach (var (label, value, tone) in pills) + { + var toneClass = string.IsNullOrEmpty(tone) ? "" : " " + tone; + sb.Append($"
{Escape(label)}
{Escape(value)}
"); + } + sb.Append("
"); + } + sb.Append("
"); + return sb.ToString(); + } + + /// Wraps content in a section div with id, title, optional row-count badge. + /// Id for the section. + /// Section title. + /// Inner HTML. + /// If set, shows "N rows" badge. + /// HTML fragment. + public static string Section(string anchor, string title, string content, int? rowCount = null) + { + var badge = rowCount.HasValue ? $"{FmtNum(rowCount.Value)} rows" : ""; + return $"

{Escape(title)}

{badge}
{content}
"; + } + + /// Wraps inner HTML in a group div with title and description. + /// Group heading. + /// Optional description. + /// Inner HTML (sections). + /// HTML fragment. + public static string Group(string groupTitle, string groupDesc, string innerHtml) => + $"

{Escape(groupTitle)}

{Escape(groupDesc)}
{innerHtml}
"; +} diff --git a/Core/Report/ReportModel.cs b/Core/Report/ReportModel.cs new file mode 100644 index 0000000..a5ae390 --- /dev/null +++ b/Core/Report/ReportModel.cs @@ -0,0 +1,69 @@ +namespace MemorySnapshotDataTools.Report; + +/// Root model for the HTML report: title, db path, generated timestamp, and ordered groups with nav. +internal sealed class ReportModel +{ + /// Report title (e.g. "Memory Snapshot Report"). + public string Title { get; set; } = string.Empty; + + /// Database path shown in the subtitle. + public string DbPath { get; set; } = string.Empty; + + /// When the report was generated (UTC string). + public string GeneratedAtUtc { get; set; } = string.Empty; + + /// Content groups (Snapshot Info, Native Objects, Managed Heap, etc.). + public List Groups { get; } = []; + + /// Navigation groups for the sidebar (mirrors group/section structure). + public List NavGroups { get; } = []; +} + +/// Logical group of sections (e.g. "Native Objects") with a title and optional description. +internal sealed class ReportGroup +{ + /// Group heading (e.g. "Native Objects"). + public string GroupTitle { get; set; } = string.Empty; + + /// Optional short description. + public string GroupDesc { get; set; } = string.Empty; + + /// Sections within this group. + public List Sections { get; } = []; +} + +/// Single report section: anchor id, title, HTML content, and optional row count badge. +internal sealed class ReportSection +{ + /// Fragment id for nav links (e.g. "native-overview"). + public string Anchor { get; set; } = string.Empty; + + /// Section heading. + public string SectionTitle { get; set; } = string.Empty; + + /// Rendered HTML for the section body. + public string ContentHtml { get; set; } = string.Empty; + + /// Optional row count for badge display. + public int? RowCount { get; set; } +} + +/// Single navigation link (anchor + display title). +internal sealed class NavItem +{ + /// Fragment id matching a section anchor. + public string Anchor { get; set; } = string.Empty; + + /// Link text. + public string Title { get; set; } = string.Empty; +} + +/// Navigation group: label and list of links. +internal sealed class NavGroup +{ + /// Group label in the nav (e.g. "Native Objects"). + public string GroupTitle { get; set; } = string.Empty; + + /// Links in this nav group. + public List Items { get; } = []; +} diff --git a/Core/Report/ReportRenderer.cs b/Core/Report/ReportRenderer.cs new file mode 100644 index 0000000..072e11c --- /dev/null +++ b/Core/Report/ReportRenderer.cs @@ -0,0 +1,125 @@ +using System.Text; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Renders a to a single self-contained HTML string with embedded CSS and sortable-table script. +/// Produces a fixed nav, main content with groups and sections, and consistent styling. +/// +internal static class ReportRenderer +{ + private const string SortableScript = """ + document.querySelectorAll('table.sortable thead th').forEach(function(th) { + th.style.cursor = 'pointer'; + th.addEventListener('click', function() { + var table = th.closest('table'); + var tbody = table.querySelector('tbody'); + var rows = Array.from(tbody.querySelectorAll('tr')); + var headerCells = table.querySelectorAll('thead th'); + var col = Array.prototype.indexOf.call(headerCells, th); + var isNum = th.classList.contains('num'); + var dir = table.dataset.sortDir === 'asc' ? -1 : 1; + table.dataset.sortDir = table.dataset.sortDir === 'asc' ? 'desc' : 'asc'; + rows.sort(function(a, b) { + var ac = a.cells[col]; + var bc = b.cells[col]; + var av = ac ? ac.textContent.trim() : ''; + var bv = bc ? bc.textContent.trim() : ''; + if (isNum) { + var an = parseFloat(av.replace(/,/g, '')) || 0; + var bn = parseFloat(bv.replace(/,/g, '')) || 0; + return dir * (an - bn); + } + return dir * (av.localeCompare(bv)); + }); + rows.forEach(function(r) { tbody.appendChild(r); }); + }); + }); + """; + + private static readonly string Css = """ + *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; font-size: 13px; background: #f0f2f5; color: #1a1a2e; padding: 24px; line-height: 1.5; } + h1 { font-size: 22px; font-weight: 700; margin-bottom: 4px; color: #1a1a2e; } + .subtitle { font-size: 12px; color: #666; margin-bottom: 32px; font-family: "SF Mono", "Fira Code", Consolas, monospace; } + nav { position: fixed; top: 24px; right: 24px; width: 210px; background: #fff; border-radius: 8px; box-shadow: 0 1px 4px rgba(0,0,0,.1); padding: 12px 0; z-index: 100; max-height: calc(100vh - 48px); overflow-y: auto; } + nav > h3 { font-size: 10px; text-transform: uppercase; letter-spacing: .06em; color: #aaa; padding: 0 14px 8px; } + .nav-group-label { font-size: 10px; font-weight: 700; text-transform: uppercase; letter-spacing: .06em; color: #1a1a2e; padding: 8px 14px 4px; border-top: 1px solid #f0f2f5; margin-top: 4px; } + .nav-group:first-child .nav-group-label { border-top: none; margin-top: 0; } + nav a { display: block; font-size: 11px; color: #555; text-decoration: none; padding: 3px 14px 3px 20px; border-left: 2px solid transparent; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } + nav a:hover { background: #f0f2f5; border-left-color: #1a73e8; color: #1a73e8; } + main { max-width: 1100px; } + .group { margin-bottom: 8px; } + .group-header { display: flex; align-items: baseline; gap: 10px; padding: 20px 4px 10px; } + .group-header h2 { font-size: 17px; font-weight: 700; color: #1a1a2e; letter-spacing: -.01em; } + .group-header .group-desc { font-size: 12px; color: #888; font-style: italic; } + .section { background: #fff; border-radius: 8px; box-shadow: 0 1px 4px rgba(0,0,0,.08); margin-bottom: 16px; overflow: hidden; } + .section-header { display: flex; align-items: baseline; gap: 10px; padding: 14px 18px 10px; border-bottom: 1px solid #e8eaed; } + h3.section-title { font-size: 14px; font-weight: 600; color: #1a1a2e; } + .badge { font-size: 11px; font-weight: 500; background: #e8f0fe; color: #1a73e8; border-radius: 12px; padding: 2px 8px; } + .insight { padding: 10px 18px; background: #f8f9fb; border-bottom: 1px solid #e8eaed; font-size: 12px; color: #444; line-height: 1.6; } + .insight strong { color: #1a1a2e; } + .insight .stat-pills { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 6px; } + .insight .pill { background: #fff; border: 1px solid #dde1e9; border-radius: 6px; padding: 4px 10px; font-size: 12px; line-height: 1.3; } + .insight .pill-label { color: #888; font-size: 10px; text-transform: uppercase; letter-spacing: .04em; } + .insight .pill-value { font-weight: 600; color: #1a1a2e; } + .insight .pill.warn .pill-value { color: #c0392b; } + .insight .pill.good .pill-value { color: #27ae60; } + .table-wrap { overflow-x: auto; } + table { width: 100%; border-collapse: collapse; } + thead th { background: #1a1a2e; color: #fff; font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: .04em; padding: 8px 12px; text-align: left; position: sticky; top: 0; white-space: nowrap; } + thead th.num { text-align: right; } + tbody tr:nth-child(even) { background: #f8f9fb; } + tbody tr:hover { background: #eef2ff; } + td { padding: 6px 12px; border-bottom: 1px solid #f0f2f5; white-space: nowrap; } + td.num { text-align: right; font-variant-numeric: tabular-nums; font-family: "SF Mono", "Fira Code", Consolas, monospace; font-size: 12px; color: #333; } + td.warn { color: #c0392b; font-weight: 600; } + td.trunc { max-width: 320px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: default; } + .empty { padding: 18px; color: #999; font-style: italic; } + .kv-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: 12px; padding: 16px 18px; border-bottom: 1px solid #f0f2f5; } + .kv-label { font-size: 10px; color: #888; text-transform: uppercase; letter-spacing: .05em; } + .kv-value { font-size: 15px; font-weight: 600; color: #1a1a2e; margin-top: 2px; } + .kv-value.mono { font-family: "SF Mono", "Fira Code", Consolas, monospace; font-size: 11px; font-weight: 400; color: #444; word-break: break-all; white-space: normal; } + """; + + /// Builds the full HTML document from the report model (nav, title, groups, sections). + /// Populated report model from . + /// Complete HTML string (UTF-8). + public static string Render(ReportModel model) + { + var titleEsc = System.Net.WebUtility.HtmlEncode(model.Title); + var dbPathEsc = System.Net.WebUtility.HtmlEncode(model.DbPath); + + var sb = new StringBuilder(); + sb.Append("\n\n\n\n\n"); + sb.Append(titleEsc); + sb.Append("\n\n\n\n\n
\n

").Append(titleEsc).Append("

\n

").Append(dbPathEsc).Append("

\n"); + + foreach (var group in model.Groups) + { + sb.Append("
\n

").Append(System.Net.WebUtility.HtmlEncode(group.GroupTitle)).Append("

").Append(System.Net.WebUtility.HtmlEncode(group.GroupDesc)).Append("
\n"); + foreach (var section in group.Sections) + { + sb.Append(section.ContentHtml); + } + sb.Append("
\n"); + } + + sb.Append("
\n\n\n"); + return sb.ToString(); + } +} diff --git a/Core/Report/ReportRunner.cs b/Core/Report/ReportRunner.cs new file mode 100644 index 0000000..a93f0fa --- /dev/null +++ b/Core/Report/ReportRunner.cs @@ -0,0 +1,82 @@ +using System.Diagnostics; +using MemorySnapshotDataTools.Report.Queries; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Entry point for report generation: builds a from the exported database via , +/// renders HTML with , writes to file (or temp + browser), and optionally opens the report in the default browser. +/// +public static class ReportRunner +{ + /// + /// Generates the memory snapshot report: queries the database, builds the model, renders HTML, writes to (or a temp file), and optionally opens it in the browser. + /// + /// Database path, output path (null = temp + open browser), and report title. + /// Progress reporter for status messages. + /// Exit code 0 on success. + public static int Run(ReportRunOptions options, IProgressReporter progress) + { + var generatedAt = DateTime.UtcNow.ToString("yyyy-MM-dd HH:mm:ss", System.Globalization.CultureInfo.InvariantCulture) + " UTC"; + + progress.Report($"Report: {options.ReportDbPath} -> {options.ReportOutputPath ?? "(temp + browser)"}", force: true); + + using var backend = ReportQueryFactory.Create(options.ReportDbPath); + progress.Report($"Backend: {backend.Dialect}", force: true); + + var swTotal = Stopwatch.StartNew(); + ReportModel model; + + var swQuery = Stopwatch.StartNew(); + try + { + model = ReportBuilder.Build(backend, options.ReportTitle, options.ReportDbPath, generatedAt); + } + finally + { + swQuery.Stop(); + } + + var swRender = Stopwatch.StartNew(); + var html = ReportRenderer.Render(model); + swRender.Stop(); + + var outPath = options.ReportOutputPath; + var openBrowser = string.IsNullOrEmpty(outPath); + if (string.IsNullOrEmpty(outPath)) + { + outPath = Path.Combine(Path.GetTempPath(), "memsnapshot_report_" + Guid.NewGuid().ToString("N")[..8] + ".html"); + } + else + { + var dir = Path.GetDirectoryName(outPath); + if (!string.IsNullOrEmpty(dir)) + Directory.CreateDirectory(dir); + } + + var swWrite = Stopwatch.StartNew(); + File.WriteAllText(outPath, html, System.Text.Encoding.UTF8); + swWrite.Stop(); + swTotal.Stop(); + + progress.Report($"Report written → {outPath}", force: true); + progress.Report( + $"Timings: query_ms={swQuery.ElapsedMilliseconds}, render_ms={swRender.ElapsedMilliseconds}, write_ms={swWrite.ElapsedMilliseconds}, total_ms={swTotal.ElapsedMilliseconds}", force: true); + progress.Report($"Report completed in {swTotal.Elapsed.TotalSeconds:F1}s (query {swQuery.Elapsed.TotalSeconds:F1}s, render {swRender.Elapsed.TotalSeconds:F1}s, write {swWrite.Elapsed.TotalSeconds:F1}s)", force: true); + + if (openBrowser) + { + try + { + var uri = new Uri(outPath); + Process.Start(new ProcessStartInfo { FileName = uri.AbsoluteUri, UseShellExecute = true }); + } + catch + { + progress.Report($"Could not open browser. Open manually: {outPath}", force: true); + } + } + + return 0; + } +} diff --git a/MemorySnapshotDataTools.sln b/MemorySnapshotDataTools.sln new file mode 100644 index 0000000..72008c3 --- /dev/null +++ b/MemorySnapshotDataTools.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Core", "Core\MemorySnapshotDataTools.Core.csproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Cli", "Cli\MemorySnapshotDataTools.Cli.csproj", "{C3D4E5F6-A7B8-9012-CDEF-123456789012}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Tests", "Tests\MemorySnapshotDataTools.Tests.csproj", "{B2C3D4E5-F6A7-8901-BCDE-F12345678901}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.Build.0 = Release|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Release|Any CPU.Build.0 = Release|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/README.md b/README.md index ee75514..ae0a7f8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,104 @@ -# cse-memory-snapshot-data-tool -[View this project in Unity Internal Developer Portal](https://developer.portal.internal.unity.com/catalog/default/component/cse-memory-snapshot-data-tool)
-# Converting to public repository -Any and all Unity software of any description (including components) (1) whose source is to be made available other than under a Unity source code license or (2) in respect of which a public announcement is to be made concerning its inner workings, may be licensed and released only upon the prior approval of Legal. +# Memory Snapshot Data Tools + +Single CLI to **export** Unity memory snapshots (`.snap`) to DuckDB or SQLite and **generate** HTML reports from those databases. + +## What it does + +- **Export:** Reads a `.snap` file, parses and extracts snapshot data, and writes it to a DuckDB (default) or SQLite file. +- **Report:** Connects to an exported database (DuckDB or SQLite), runs report queries, and produces a self-contained HTML report with sortable tables. + +## Prerequisites + +- [.NET 10 SDK](https://dotnet.microsoft.com/download/dotnet/10.0) + +## How it works + +- **Cli** (exe): entry point and options; **Core** (library): Parser (extraction), Export + ExportDestination (write DBs), Report (query + render). Shared data lives in Core (Models). +- **Export:** reads `.snap` via Parser, extracts rows (SnapshotBridge), writes to DuckDB or SQLite via a producer/consumer pipeline. +- **Report:** opens the DB with Report/Queries backend, runs SQL, builds ReportModel, renders HTML (ReportRenderer + ReportHtmlHelper). + +## How to use + +Use the **MemorySnapshotDataTools** directory as the project root. Run the CLI with the Cli project: + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- [args...] +``` + +Or from the `Cli` directory: `dotnet run -- [args...]`. + +### Export a snapshot to a database + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export [options] +``` + +- Use a `.duckdb` extension for DuckDB (default) or `.db` for SQLite. +- **Options:** `--destination duckdb|sqlite`, `--validate none|minimal|full`, `--verbose` (progress and timings). + +**Example (DuckDB):** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export ./memory.snap ./out.duckdb --validate minimal --verbose +``` + +**Example (SQLite):** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export ./memory.snap ./out.db --destination sqlite --validate minimal --verbose +``` + +### Generate a report from a database + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report [--out report.html] [options] +``` + +- **`--out`** path: where to write the HTML file. If omitted, writes to a temp file and opens it in the browser. +- **`--title "Title"`:** report title (default: "Memory Snapshot Report"). +- **`--verbose`:** print timings (query, render, write). + +**Example:** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report ./out.duckdb --out report.html --verbose +``` + +## Output + +- **Export:** Creates a `.duckdb` or `.db` file with tables: `snapshot_info`, `native_objects`, `managed_objects`, `connections`, `native_roots`, `memory_regions`, `native_allocations`. +- **Report:** Produces one HTML file with navigation, sections (Snapshot Info, Native Objects, Managed Heap, Roots, Regions, Connections), and sortable tables. +- **Timings:** With `--verbose`, export prints parse+extract vs. write; report prints query vs. render vs. write and a one-line summary (e.g. `Report completed in 2.3s (query 1.1s, render 0.5s, write 0.1s)`). + +## Schema (for ad-hoc queries) + +| Table | Description | +|---------------------|--------------------------------------------------| +| `snapshot_info` | Snapshot path, export timestamp, Unity version | +| `native_objects` | Native Unity objects (size, type, name) | +| `managed_objects` | Managed heap objects (address, type, size) | +| `connections` | Edges: from_kind/from_index → to_kind/to_index | +| `native_roots` | Root references and accumulated size | +| `memory_regions` | Native memory regions (address, size, hierarchy) | +| `native_allocations`| Allocations within regions | + +Use any DuckDB or SQLite client to query these tables. + +## Build and test + +From the project root: + +```bash +dotnet build +dotnet test +``` + +To run the CLI: `dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj --` or publish the Cli project (see below). + +## Publish (versioned artifacts) + +From the project root, run `./publish.sh` (macOS/Linux) or `./publish.ps1` (Windows). These publish the **Cli** project and produce `artifacts/MemorySnapshotDataTools--.zip` for each runtime (win-x64, linux-x64, osx-x64, osx-arm64). + +## AI IDE integration + +A project skill for Cursor (and similar AI IDEs) is in `.cursor/skills/memory-snapshot-report/`. It describes the export and report workflow and when to use it. diff --git a/README_UNITY.md b/README_UNITY.md new file mode 100644 index 0000000..ee75514 --- /dev/null +++ b/README_UNITY.md @@ -0,0 +1,4 @@ +# cse-memory-snapshot-data-tool +[View this project in Unity Internal Developer Portal](https://developer.portal.internal.unity.com/catalog/default/component/cse-memory-snapshot-data-tool)
+# Converting to public repository +Any and all Unity software of any description (including components) (1) whose source is to be made available other than under a Unity source code license or (2) in respect of which a public announcement is to be made concerning its inner workings, may be licensed and released only upon the prior approval of Legal. diff --git a/Tests/MemorySnapshotDataTools.Tests.csproj b/Tests/MemorySnapshotDataTools.Tests.csproj new file mode 100644 index 0000000..a43344d --- /dev/null +++ b/Tests/MemorySnapshotDataTools.Tests.csproj @@ -0,0 +1,17 @@ + + + net10.0 + enable + enable + latest + false + + + + + + + + + + diff --git a/Tests/SnapshotBridgeTests.cs b/Tests/SnapshotBridgeTests.cs new file mode 100644 index 0000000..e251d1a --- /dev/null +++ b/Tests/SnapshotBridgeTests.cs @@ -0,0 +1,101 @@ +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.Parser; +using Xunit; + +namespace MemorySnapshotDataTools.Tests; + +public sealed class SnapshotBridgeTests +{ + /// + /// Builds a minimal DecodedSnapshot that passes ExtractFromDecoded validation: + /// no managed objects, no connections, no memory regions/allocations. + /// + private static DecodedSnapshot CreateMinimalDecoded() + { + return new DecodedSnapshot + { + FormatVersion = 1, + NativeTypeNames = [], + NativeObjectTypeIndices = [], + NativeObjectInstanceIds = [], + NativeObjectNames = [], + NativeObjectSizes = [], + NativeObjectFlags = [], + NativeObjectGcHandleIndices = [], + GcHandleTargets = [], + ConnectionsFrom = [], + ConnectionsTo = [], + NativeRootIds = [], + NativeRootAreaNames = [], + NativeRootObjectNames = [], + NativeRootAccumulatedSizes = [], + NativeMemoryRegionNames = [], + NativeMemoryRegionParentIndices = [], + NativeMemoryRegionAddressBases = [], + NativeMemoryRegionAddressSizes = [], + NativeMemoryRegionFirstAllocationIndices = [], + NativeMemoryRegionNumAllocations = [], + NativeAllocationAddresses = [], + NativeAllocationSizes = [], + NativeAllocationOverheadSizes = [], + NativeAllocationPaddingSizes = [], + NativeAllocationMemoryRegionIndices = [], + VirtualMachineInformation = new DecodedVirtualMachineInfo { PointerSize = 8 }, + ManagedHeapSectionStartAddresses = [], + ManagedHeapSectionBytes = [], + ManagedTypeFlags = [], + ManagedTypeNames = [], + ManagedTypeAssemblies = [], + ManagedTypeBaseOrElementTypeIndices = [], + ManagedTypeSizes = [], + ManagedTypeInfoAddresses = [], + ManagedTypeFieldIndices = [], + FieldOffsets = [], + FieldTypeIndices = [], + FieldNames = [], + FieldIsStatic = [], + }; + } + + [Fact] + public void ExtractFromDecoded_MinimalNativeRoots_ProducesMatchingRows() + { + var decoded = CreateMinimalDecoded(); + decoded.NativeRootIds = [123L]; + decoded.NativeRootAreaNames = ["Scene"]; + decoded.NativeRootObjectNames = ["Root"]; + decoded.NativeRootAccumulatedSizes = [1000UL]; + + var data = SnapshotBridge.ExtractFromDecoded(decoded, "/path/to/snap.snap"); + + var row = Assert.Single(data.NativeRoots); + Assert.Equal(0, row.RootIndex); + Assert.Equal(123L, row.RootId); + Assert.Equal("Scene", row.AreaName); + Assert.Equal("Root", row.ObjectName); + Assert.Equal(1000UL, row.AccumulatedSizeBytes); + } + + [Fact] + public void ExtractFromDecoded_MinimalNativeObjects_ProducesMatchingRows() + { + var decoded = CreateMinimalDecoded(); + decoded.NativeTypeNames = ["GameObject"]; + decoded.NativeObjectTypeIndices = [0]; + decoded.NativeObjectInstanceIds = [42UL]; + decoded.NativeObjectNames = ["MyGo"]; + decoded.NativeObjectSizes = [64UL]; + decoded.NativeObjectFlags = [0]; + + var data = SnapshotBridge.ExtractFromDecoded(decoded, "/path/to/snap.snap"); + + var row = Assert.Single(data.NativeObjects); + Assert.Equal(0, row.NativeObjectIndex); + Assert.Equal("42", row.InstanceId); + Assert.Equal("MyGo", row.Name); + Assert.Equal(64UL, row.SizeBytes); + Assert.Equal(0, row.TypeIndex); + Assert.Equal("GameObject", row.NativeTypeName); + Assert.False(row.IsDestroyed); + } +} diff --git a/publish.ps1 b/publish.ps1 new file mode 100644 index 0000000..05c332d --- /dev/null +++ b/publish.ps1 @@ -0,0 +1,37 @@ +# Build and zip MemorySnapshotDataTools for each RID. Run from MemorySnapshotDataTools (project root). +# Produces: artifacts/MemorySnapshotDataTools--.zip + +$ErrorActionPreference = "Stop" +$Root = Split-Path -Parent $MyInvocation.MyCommand.Path +$Project = Join-Path $Root "Cli\MemorySnapshotDataTools.Cli.csproj" +$PublishDir = Join-Path $Root "publish" +$ArtifactsDir = Join-Path $Root "artifacts" +$Rids = @("win-x64", "linux-x64", "osx-x64", "osx-arm64") + +# Read version from csproj +$versionNode = Select-String -Path $Project -Pattern '([^<]+)' -AllMatches +if (-not $versionNode) { throw "Could not read Version from $Project" } +$Version = $versionNode.Matches.Groups[1].Value + +New-Item -ItemType Directory -Force -Path $PublishDir, $ArtifactsDir | Out-Null +Push-Location $Root + +try { + foreach ($rid in $Rids) { + Write-Host "Publishing $rid..." + $outDir = Join-Path $PublishDir $rid + dotnet publish $Project -c Release -r $rid --self-contained true -p:PublishSingleFile=true -o $outDir + $zipName = "MemorySnapshotDataTools-$Version-$rid.zip" + $zipPath = Join-Path $ArtifactsDir $zipName + Write-Host "Zipping $zipName" + Compress-Archive -Path (Join-Path $outDir "*") -DestinationPath $zipPath -Force + Remove-Item -Recurse -Force $outDir -ErrorAction SilentlyContinue + } + if ((Get-ChildItem $PublishDir -ErrorAction SilentlyContinue).Count -eq 0) { + Remove-Item -Force $PublishDir -ErrorAction SilentlyContinue + } + Write-Host "Done. Artifacts in $ArtifactsDir:" + Get-ChildItem (Join-Path $ArtifactsDir "*.zip") | Format-Table Name, Length -AutoSize +} finally { + Pop-Location +} diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..063d2a7 --- /dev/null +++ b/publish.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Build and zip MemorySnapshotDataTools for each RID. Run from MemorySnapshotDataTools (project root). +# Produces: artifacts/MemorySnapshotDataTools--.zip + +set -euo pipefail +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT="$ROOT/Cli/MemorySnapshotDataTools.Cli.csproj" +PUBLISH_DIR="$ROOT/publish" +ARTIFACTS_DIR="$ROOT/artifacts" +RIDS=(win-x64 linux-x64 osx-x64 osx-arm64) + +# Read version from csproj (e.g. 0.1.0) +VERSION=$(grep -oE '[^<]+' "$PROJECT" | sed 's/<[^>]*>//g') +if [[ -z "$VERSION" ]]; then + echo "Could not read Version from $PROJECT" + exit 1 +fi + +cd "$ROOT" +mkdir -p "$PUBLISH_DIR" "$ARTIFACTS_DIR" + +for RID in "${RIDS[@]}"; do + echo "Publishing $RID..." + dotnet publish "$PROJECT" -c Release -r "$RID" --self-contained true -p:PublishSingleFile=true -o "$PUBLISH_DIR/$RID" + echo "Zipping MemorySnapshotDataTools-$VERSION-$RID.zip" + (cd "$PUBLISH_DIR/$RID" && zip -rq "$ARTIFACTS_DIR/MemorySnapshotDataTools-$VERSION-$RID.zip" .) + rm -rf "$PUBLISH_DIR/$RID" +done + +rmdir "$PUBLISH_DIR" 2>/dev/null || true +echo "Done. Artifacts in $ARTIFACTS_DIR:" +ls -la "$ARTIFACTS_DIR"/*.zip