From d9157e3b6b3ec25367376c338e856199de998113 Mon Sep 17 00:00:00 2001 From: Zack Asofsky Date: Thu, 12 Mar 2026 12:13:10 -0400 Subject: [PATCH] Initial project setup with core functionality for exporting Unity memory snapshots to DuckDB or SQLite and generating HTML reports. Added CLI interface, project structure, and build scripts. Included .gitignore and CI configuration for automated testing. --- .../skills/memory-snapshot-report/SKILL.md | 51 ++ .github/workflows/ci.yaml | 30 + .gitignore | 25 + Cli/CliOptions.cs | 53 ++ Cli/CommandLineBuilder.cs | 163 ++++ Cli/MemorySnapshotDataTools.Cli.csproj | 19 + Cli/Program.cs | 126 +++ Core/Export/ExportErrors.cs | 19 + Core/Export/ExportPipeline.cs | 266 +++++++ .../DuckDbExportDestination.cs | 426 +++++++++++ .../ExportDestinationFactory.cs | 18 + .../IExportDestinationWriter.cs | 38 + .../SqliteExportDestination.cs | 26 + Core/ExportDestination/SqliteWriter.cs | 721 ++++++++++++++++++ Core/MemorySnapshotDataTools.Core.csproj | 18 + Core/Models/ExportPipeline.cs | 219 ++++++ Core/Models/IProgressReporter.cs | 16 + Core/Models/Options.cs | 63 ++ Core/Models/SnapshotData.cs | 49 ++ Core/Models/SnapshotRows.cs | 145 ++++ Core/Parser/ManagedSnapshotCrawler.cs | 598 +++++++++++++++ Core/Parser/SnapDataModel.cs | 239 ++++++ Core/Parser/SnapReader.cs | 529 +++++++++++++ Core/Parser/SnapSectionDecoders.cs | 423 ++++++++++ Core/Parser/SnapshotBridge.cs | 261 +++++++ Core/Report/Queries/DuckDbReportQueries.cs | 58 ++ Core/Report/Queries/IReportQueryBackend.cs | 32 + Core/Report/Queries/ReportQueryFactory.cs | 34 + Core/Report/Queries/ReportSql.cs | 449 +++++++++++ Core/Report/Queries/SqliteReportQueries.cs | 61 ++ Core/Report/ReportBuilder.cs | 365 +++++++++ Core/Report/ReportHtmlHelper.cs | 199 +++++ Core/Report/ReportModel.cs | 69 ++ Core/Report/ReportRenderer.cs | 125 +++ Core/Report/ReportRunner.cs | 82 ++ MemorySnapshotDataTools.sln | 34 + README.md | 108 ++- README_UNITY.md | 4 + Tests/MemorySnapshotDataTools.Tests.csproj | 17 + Tests/SnapshotBridgeTests.cs | 101 +++ publish.ps1 | 37 + publish.sh | 32 + 42 files changed, 6344 insertions(+), 4 deletions(-) create mode 100644 .cursor/skills/memory-snapshot-report/SKILL.md create mode 100644 .github/workflows/ci.yaml create mode 100644 .gitignore create mode 100644 Cli/CliOptions.cs create mode 100644 Cli/CommandLineBuilder.cs create mode 100644 Cli/MemorySnapshotDataTools.Cli.csproj create mode 100644 Cli/Program.cs create mode 100644 Core/Export/ExportErrors.cs create mode 100644 Core/Export/ExportPipeline.cs create mode 100644 Core/ExportDestination/DuckDbExportDestination.cs create mode 100644 Core/ExportDestination/ExportDestinationFactory.cs create mode 100644 Core/ExportDestination/IExportDestinationWriter.cs create mode 100644 Core/ExportDestination/SqliteExportDestination.cs create mode 100644 Core/ExportDestination/SqliteWriter.cs create mode 100644 Core/MemorySnapshotDataTools.Core.csproj create mode 100644 Core/Models/ExportPipeline.cs create mode 100644 Core/Models/IProgressReporter.cs create mode 100644 Core/Models/Options.cs create mode 100644 Core/Models/SnapshotData.cs create mode 100644 Core/Models/SnapshotRows.cs create mode 100644 Core/Parser/ManagedSnapshotCrawler.cs create mode 100644 Core/Parser/SnapDataModel.cs create mode 100644 Core/Parser/SnapReader.cs create mode 100644 Core/Parser/SnapSectionDecoders.cs create mode 100644 Core/Parser/SnapshotBridge.cs create mode 100644 Core/Report/Queries/DuckDbReportQueries.cs create mode 100644 Core/Report/Queries/IReportQueryBackend.cs create mode 100644 Core/Report/Queries/ReportQueryFactory.cs create mode 100644 Core/Report/Queries/ReportSql.cs create mode 100644 Core/Report/Queries/SqliteReportQueries.cs create mode 100644 Core/Report/ReportBuilder.cs create mode 100644 Core/Report/ReportHtmlHelper.cs create mode 100644 Core/Report/ReportModel.cs create mode 100644 Core/Report/ReportRenderer.cs create mode 100644 Core/Report/ReportRunner.cs create mode 100644 MemorySnapshotDataTools.sln create mode 100644 README_UNITY.md create mode 100644 Tests/MemorySnapshotDataTools.Tests.csproj create mode 100644 Tests/SnapshotBridgeTests.cs create mode 100644 publish.ps1 create mode 100755 publish.sh diff --git a/.cursor/skills/memory-snapshot-report/SKILL.md b/.cursor/skills/memory-snapshot-report/SKILL.md new file mode 100644 index 0000000..d6dfd37 --- /dev/null +++ b/.cursor/skills/memory-snapshot-report/SKILL.md @@ -0,0 +1,51 @@ +--- +name: memory-snapshot-report +description: Generate and view Unity memory snapshot reports. Use when the user wants to analyze a Unity memory snapshot, export it to a database, or generate/view an HTML report. +--- + +# Memory Snapshot Report + +## When to use + +- User wants to analyze a Unity memory snapshot (`.snap` file). +- User wants to export a snapshot to a DuckDB or SQLite database. +- User wants to generate or view an HTML report from an exported snapshot database. + +## Prerequisites + +- .NET 10 SDK. +- Project path: **MemorySnapshotDataTools** is the project root; run commands from that directory. + +## Steps + +### 1. Export snapshot to database + +From the MemorySnapshotDataTools directory: + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export --validate minimal --verbose +``` + +- Use `.duckdb` for DuckDB (recommended) or `.db` for SQLite. +- For SQLite add `--destination sqlite`. +- `--verbose` prints progress and timings (parse+extract vs. write). + +### 2. Generate HTML report + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report --out report.html --verbose +``` + +- Omit `--out` to write to a temp file and open in the browser. +- Use `--title "My Report"` to set the report title. +- Report works with either DuckDB or SQLite databases produced by the export command. + +### 3. Optional + +- Open the generated HTML file or DB in the user’s preferred viewer. +- For ad-hoc SQL, use the same DB path; tables include `snapshot_info`, `native_objects`, `managed_objects`, `connections`, `native_roots`, `memory_regions`, `native_allocations`. + +## Domain + +- The tool supports **DuckDB** (default) and **SQLite**; report can be generated from either. +- The CLI reports **timings**: export shows parse+extract vs. write; report shows query vs. render vs. write. Use `--verbose` to see them. diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..c57fe07 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,30 @@ +# Build and run unit tests on PRs targeting main and on pushes to main. + +name: CI + +on: + pull_request: + branches: [main] + push: + branches: [main] + +jobs: + build-and-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: '10.0.x' + + - name: Restore + run: dotnet restore MemorySnapshotDataTools.sln + + - name: Build + run: dotnet build MemorySnapshotDataTools.sln -c Release --no-restore + + - name: Test + run: dotnet test MemorySnapshotDataTools.sln -c Release --no-build -v normal diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e5cb2f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +# Build / output +bin/ +obj/ +artifacts/ +publish/ + +# IDE +*.user +*.suo +.vs/ +.idea/ + +# OS +.DS_Store +Thumbs.db + +# Report artifacts +*.log +*.duckdb +*.db +*.html + +# Snapshots +*.snap +MemoryCaptures/ diff --git a/Cli/CliOptions.cs b/Cli/CliOptions.cs new file mode 100644 index 0000000..fe2a436 --- /dev/null +++ b/Cli/CliOptions.cs @@ -0,0 +1,53 @@ +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Cli; + +internal enum CommandKind +{ + Export, + Report, +} + +/// +/// Parsed CLI options passed from System.CommandLine handlers to RunExport/RunReport. +/// +internal sealed class CliOptions +{ + public CommandKind Command { get; set; } = CommandKind.Export; + public string SnapshotPath { get; set; } = string.Empty; + public string OutputDbPath { get; set; } = string.Empty; + public string ReportDbPath { get; set; } = string.Empty; + public string? ReportOutputPath { get; set; } + public string ReportTitle { get; set; } = "Memory Snapshot Report"; + public int BatchSize { get; set; } = 2048; + public int QueueCapacity { get; set; } = 256; + public ValidationMode Validate { get; set; } = ValidationMode.Minimal; + public DestinationKind Destination { get; set; } = DestinationKind.DuckDb; + public bool Verbose { get; set; } +} + +internal sealed class ConsoleProgress : IProgressReporter +{ + private readonly bool _verbose; + private readonly object _lock = new(); + private DateTime _lastWrite = DateTime.MinValue; + + public ConsoleProgress(bool verbose) + { + _verbose = verbose; + } + + public void Report(string message, bool force = false) + { + if (!_verbose && !force) + return; + + lock (_lock) + { + if (!force && DateTime.UtcNow - _lastWrite < TimeSpan.FromMilliseconds(250)) + return; + _lastWrite = DateTime.UtcNow; + Console.WriteLine($"[{DateTime.UtcNow:O}] {message}"); + } + } +} diff --git a/Cli/CommandLineBuilder.cs b/Cli/CommandLineBuilder.cs new file mode 100644 index 0000000..5f0b9bf --- /dev/null +++ b/Cli/CommandLineBuilder.cs @@ -0,0 +1,163 @@ +using System.CommandLine; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Cli; + +/// +/// Builds the root command and subcommands (export, report) using System.CommandLine. +/// +internal static class CommandLineBuilder +{ + public static RootCommand Build(Func runExport, Func runReport) + { + var root = new RootCommand("Export Unity memory snapshots to DuckDB or SQLite and generate HTML reports."); + + // ---- export ---- + var exportCmd = new Command("export", "Export a .snap file to a DuckDB or SQLite database."); + var snapshotArg = new Argument("snapshot") + { + Description = "Path to the Unity memory snapshot (.snap) file.", + Arity = ArgumentArity.ExactlyOne, + }; + var outputArg = new Argument("output") + { + Description = "Path to the output database (.duckdb or .db).", + Arity = ArgumentArity.ExactlyOne, + }; + exportCmd.Add(snapshotArg); + exportCmd.Add(outputArg); + + var batchSizeOpt = new Option("--batch-size") + { + Description = "Rows per produced batch.", + DefaultValueFactory = _ => 2048, + }; + var queueCapacityOpt = new Option("--queue-capacity") + { + Description = "Max queued batches.", + DefaultValueFactory = _ => 256, + }; + var validateOpt = new Option("--validate") + { + Description = "Validation mode: none, minimal, or full.", + DefaultValueFactory = _ => "minimal", + }; + validateOpt.AcceptOnlyFromAmong("none", "minimal", "full"); + var destinationOpt = new Option("--destination") + { + Description = "Export backend: duckdb or sqlite.", + DefaultValueFactory = _ => "duckdb", + }; + destinationOpt.AcceptOnlyFromAmong("duckdb", "sqlite"); + var verboseOpt = new Option("--verbose") + { + Description = "Print progress updates.", + }; + + exportCmd.Add(batchSizeOpt); + exportCmd.Add(queueCapacityOpt); + exportCmd.Add(validateOpt); + exportCmd.Add(destinationOpt); + exportCmd.Add(verboseOpt); + + exportCmd.SetAction((ParseResult parseResult) => + { + var snapshotPath = ExpandPath(parseResult.GetValue(snapshotArg)!); + var outputDbPath = ExpandPath(parseResult.GetValue(outputArg)!); + if (!File.Exists(snapshotPath)) + { + Console.Error.WriteLine($"Snapshot file not found: {snapshotPath}"); + return 1; + } + var options = new CliOptions + { + Command = CommandKind.Export, + SnapshotPath = snapshotPath, + OutputDbPath = outputDbPath, + BatchSize = parseResult.GetValue(batchSizeOpt), + QueueCapacity = parseResult.GetValue(queueCapacityOpt), + Validate = ParseValidationMode(parseResult.GetValue(validateOpt)!), + Destination = parseResult.GetValue(destinationOpt)!.ToLowerInvariant() == "sqlite" ? DestinationKind.Sqlite : DestinationKind.DuckDb, + Verbose = parseResult.GetValue(verboseOpt), + }; + return runExport(options); + }); + + // ---- report ---- + var reportCmd = new Command("report", "Generate an HTML report from an exported database."); + var databaseArg = new Argument("database") + { + Description = "Path to the exported database (.duckdb or .db).", + Arity = ArgumentArity.ExactlyOne, + }; + reportCmd.Add(databaseArg); + + var outOpt = new Option("--out") + { + Description = "Output HTML file path (default: temp file + open in browser).", + }; + var titleOpt = new Option("--title") + { + Description = "Report title.", + DefaultValueFactory = _ => "Memory Snapshot Report", + }; + var reportVerboseOpt = new Option("--verbose") + { + Description = "Print progress and timings.", + }; + + reportCmd.Add(outOpt); + reportCmd.Add(titleOpt); + reportCmd.Add(reportVerboseOpt); + + reportCmd.SetAction((ParseResult parseResult) => + { + var reportDbPath = ExpandPath(parseResult.GetValue(databaseArg)!); + if (!File.Exists(reportDbPath)) + { + Console.Error.WriteLine($"Database file not found: {reportDbPath}"); + return 1; + } + var outPath = parseResult.GetValue(outOpt); + var options = new CliOptions + { + Command = CommandKind.Report, + ReportDbPath = reportDbPath, + ReportOutputPath = string.IsNullOrWhiteSpace(outPath) ? null : ExpandPath(outPath), + ReportTitle = parseResult.GetValue(titleOpt)!, + Verbose = parseResult.GetValue(reportVerboseOpt), + }; + return runReport(options); + }); + + root.Add(exportCmd); + root.Add(reportCmd); + return root; + } + + private static ValidationMode ParseValidationMode(string value) + { + return value.ToLowerInvariant() switch + { + "none" => ValidationMode.None, + "minimal" => ValidationMode.Minimal, + "full" => ValidationMode.Full, + _ => ValidationMode.Minimal, + }; + } + + private static string ExpandPath(string value) + { + if (string.IsNullOrWhiteSpace(value)) + return value; + + var expanded = Environment.ExpandEnvironmentVariables(value); + if (expanded.StartsWith("~/", StringComparison.Ordinal) || expanded == "~") + { + var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile); + var suffix = expanded.Length > 2 ? expanded[2..] : string.Empty; + expanded = Path.Combine(home, suffix); + } + return Path.GetFullPath(expanded); + } +} diff --git a/Cli/MemorySnapshotDataTools.Cli.csproj b/Cli/MemorySnapshotDataTools.Cli.csproj new file mode 100644 index 0000000..8950491 --- /dev/null +++ b/Cli/MemorySnapshotDataTools.Cli.csproj @@ -0,0 +1,19 @@ + + + Exe + net10.0 + enable + enable + latest + MemorySnapshotDataTools.Cli + MemorySnapshotDataTools + 0.1.0 + true + true + + + + + + + diff --git a/Cli/Program.cs b/Cli/Program.cs new file mode 100644 index 0000000..86fdc75 --- /dev/null +++ b/Cli/Program.cs @@ -0,0 +1,126 @@ +using System.Diagnostics; +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.Export; +using MemorySnapshotDataTools.ExportDestination; +using MemorySnapshotDataTools.Parser; +using MemorySnapshotDataTools.Report; + +namespace MemorySnapshotDataTools.Cli; + +internal static class Program +{ + private static int Main(string[] args) + { + var root = CommandLineBuilder.Build(RunExport, RunReport); + return root.Parse(args).Invoke(); + } + + private static int RunExport(CliOptions options) + { + var destination = ExportDestinationFactory.Create(options.Destination); + var progress = new ConsoleProgress(options.Verbose); + progress.Report($"Backend: {destination.DestinationName}", force: true); + + using var cts = new CancellationTokenSource(); + Console.CancelKeyPress += (_, e) => + { + e.Cancel = true; + cts.Cancel(); + }; + + try + { + var sw = Stopwatch.StartNew(); + + var exportOptions = new ExportRunOptions + { + OutputDbPath = options.OutputDbPath, + BatchSize = options.BatchSize, + QueueCapacity = options.QueueCapacity, + Validate = options.Validate, + }; + + var extractSw = Stopwatch.StartNew(); + var rawData = RunStage("snapshot-extract", progress, () => SnapshotBridge.ExtractRawData(options.SnapshotPath, progress, cts.Token)); + extractSw.Stop(); + + var pipelineSw = Stopwatch.StartNew(); + var counts = RunStage("pipeline-write", progress, () => ExportPipeline.Run(exportOptions, rawData, destination, progress, cts.Token)); + pipelineSw.Stop(); + + var validationSw = Stopwatch.StartNew(); + RunStage("validation", progress, () => destination.Validate(options.OutputDbPath, rawData, options.Validate)); + validationSw.Stop(); + + counts.TotalMs = sw.ElapsedMilliseconds; + var pipelineRps = pipelineSw.ElapsedMilliseconds > 0 + ? rawData.TotalRows * 1000.0 / pipelineSw.ElapsedMilliseconds + : 0.0; + + progress.Report( + $"Done. backend={destination.DestinationName}, native_objects={counts.NativeObjects}, managed_objects={counts.ManagedObjects}, connections={counts.Connections}, native_roots={counts.NativeRoots}, " + + $"memory_regions={counts.MemoryRegions}, native_allocations={counts.NativeAllocations}, " + + $"extract_ms={extractSw.ElapsedMilliseconds}, pipeline_ms={pipelineSw.ElapsedMilliseconds}, validation_ms={validationSw.ElapsedMilliseconds}, total_ms={counts.TotalMs}, " + + $"pipeline_rps={pipelineRps:N0}, backend_insert_ms={counts.BackendInsertMs}, backend_commit_ms={counts.BackendCommitMs}, backend_index_ms={counts.BackendIndexBuildMs}, " + + $"insert_ms_by_table(native={counts.NativeObjectInsertMs}, managed={counts.ManagedObjectInsertMs}, connections={counts.ConnectionInsertMs}, roots={counts.NativeRootInsertMs}, regions={counts.MemoryRegionInsertMs}, allocations={counts.NativeAllocationInsertMs})"); + return 0; + } + catch (OperationCanceledException) + { + Console.Error.WriteLine("Export cancelled."); + return 2; + } + catch (Exception ex) + { + Console.Error.WriteLine("Export failed."); + if (ex is ExportStageException stageEx) + { + Console.Error.WriteLine($"Failure stage: {stageEx.Stage}"); + Console.Error.WriteLine(stageEx.InnerException ?? stageEx); + } + else + { + Console.Error.WriteLine(ex); + } + return 3; + } + } + + private static int RunReport(CliOptions options) + { + var reportOptions = new ReportRunOptions + { + ReportDbPath = options.ReportDbPath, + ReportOutputPath = options.ReportOutputPath, + ReportTitle = options.ReportTitle, + }; + var progress = new ConsoleProgress(options.Verbose); + return ReportRunner.Run(reportOptions, progress); + } + + private static void RunStage(string stage, ConsoleProgress progress, Action action) + { + progress.Report($"[{stage}] start", force: true); + try + { + action(); + } + catch (Exception ex) when (ex is not ExportStageException) + { + throw new ExportStageException(stage, ex); + } + } + + private static T RunStage(string stage, ConsoleProgress progress, Func action) + { + progress.Report($"[{stage}] start", force: true); + try + { + return action(); + } + catch (Exception ex) when (ex is not ExportStageException) + { + throw new ExportStageException(stage, ex); + } + } +} diff --git a/Core/Export/ExportErrors.cs b/Core/Export/ExportErrors.cs new file mode 100644 index 0000000..d70d434 --- /dev/null +++ b/Core/Export/ExportErrors.cs @@ -0,0 +1,19 @@ +namespace MemorySnapshotDataTools.Export; + +/// +/// Exception thrown when an export stage (e.g. extract, write, validate) fails. Wraps the inner exception and records the stage name. +/// +public sealed class ExportStageException : Exception +{ + /// Creates an exception for a failed export stage. + /// Name of the stage that failed (e.g. "extract", "write"). + /// The underlying exception. + public ExportStageException(string stage, Exception innerException) + : base($"Stage '{stage}' failed.", innerException) + { + Stage = stage; + } + + /// Name of the export stage that failed. + public string Stage { get; } +} diff --git a/Core/Export/ExportPipeline.cs b/Core/Export/ExportPipeline.cs new file mode 100644 index 0000000..cc01aa0 --- /dev/null +++ b/Core/Export/ExportPipeline.cs @@ -0,0 +1,266 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Runtime.ExceptionServices; +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.ExportDestination; + +namespace MemorySnapshotDataTools.Export; + +/// +/// Orchestrates parallel export of to a database: producers materialize batches per table, +/// a single writer consumes from a bounded queue and writes via . +/// Reports progress and respects cancellation. +/// +public static class ExportPipeline +{ + /// Minimum interval (ms) between progress reports during materialize+write to avoid flooding the console. + private const int ProgressReportIntervalMs = 350; + + /// Sleep (ms) between monitor loop iterations when waiting on producers or writer. + private const int MonitorPollIntervalMs = 125; + + /// + /// Runs the full export: starts the destination writer and parallel producers, monitors until completion, then returns counts and timings. + /// Validates that materialized and written row counts match the raw data. + /// + /// Batch size, queue capacity, output path. + /// Extracted snapshot data to export. + /// Writer implementation (DuckDB or SQLite). + /// Progress reporter. + /// Cancellation token. + /// Row counts and timing statistics. + /// If materialized or written row counts do not match. + /// When is cancelled. + public static ExportCounts Run(ExportRunOptions options, RawSnapshotData rawData, IExportDestinationWriter destination, IProgressReporter progress, CancellationToken token) + { + var counts = new ExportCounts(); + var state = new PipelineState(rawData.TotalRows); + using var cts = CancellationTokenSource.CreateLinkedTokenSource(token); + var queue = new BlockingCollection(options.QueueCapacity); + + progress.Report($"Starting {destination.DestinationName} writer with {rawData.TotalRows:N0} total rows...", force: true); + var writerTask = Task.Run( + () => destination.ConsumeAndWrite(options.OutputDbPath, rawData.SnapshotInfo, queue, state, cts.Token), + cts.Token); + + var materializeSw = Stopwatch.StartNew(); + var producerTasks = new[] + { + Task.Run(() => ProduceNativeRoots(rawData.NativeRoots, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceMemoryRegions(rawData.MemoryRegions, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceNativeAllocations(rawData.NativeAllocations, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceNativeObjects(rawData.NativeObjects, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceManagedObjects(rawData.ManagedObjects, queue, state, options.BatchSize, cts.Token), cts.Token), + Task.Run(() => ProduceConnections(rawData.Connections, queue, state, options.BatchSize, cts.Token), cts.Token), + }; + + MonitorOverlap(producerTasks, writerTask, queue, progress, state, options.QueueCapacity, cts); + materializeSw.Stop(); + + var writeSw = Stopwatch.StartNew(); + MonitorWriter(writerTask, progress, state, options.QueueCapacity, cts); + writeSw.Stop(); + var writeStats = writerTask.GetAwaiter().GetResult(); + + counts.NativeObjects = rawData.NativeObjects.Count; + counts.ManagedObjects = rawData.ManagedObjects.Count; + counts.Connections = rawData.Connections.Count; + counts.NativeRoots = rawData.NativeRoots.Count; + counts.MemoryRegions = rawData.MemoryRegions.Count; + counts.NativeAllocations = rawData.NativeAllocations.Count; + counts.MaterializeMs = materializeSw.ElapsedMilliseconds; + counts.WriteMs = writeSw.ElapsedMilliseconds; + counts.BackendInsertMs = writeStats.TotalInsertMs; + counts.BackendCommitMs = writeStats.CommitMs; + counts.BackendIndexBuildMs = writeStats.IndexBuildMs; + counts.NativeObjectInsertMs = writeStats.NativeObjectInsertMs; + counts.ManagedObjectInsertMs = writeStats.ManagedObjectInsertMs; + counts.ConnectionInsertMs = writeStats.ConnectionInsertMs; + counts.NativeRootInsertMs = writeStats.NativeRootInsertMs; + counts.MemoryRegionInsertMs = writeStats.MemoryRegionInsertMs; + counts.NativeAllocationInsertMs = writeStats.NativeAllocationInsertMs; + + if (state.MaterializedRows != rawData.TotalRows) + throw new InvalidOperationException($"Materialized rows mismatch. expected={rawData.TotalRows}, actual={state.MaterializedRows}"); + if (state.WrittenRows != rawData.TotalRows + 1) + throw new InvalidOperationException($"Written rows mismatch. expected={rawData.TotalRows + 1}, actual={state.WrittenRows}"); + + return counts; + } + + private static void MonitorOverlap( + Task[] producerTasks, + Task writerTask, + BlockingCollection queue, + IProgressReporter progress, + PipelineState state, + int queueCapacity, + CancellationTokenSource cts) + { + var lastWrite = DateTime.MinValue; + while (producerTasks.Any(t => !t.IsCompleted)) + { + ThrowIfFaulted(producerTasks, writerTask); + var produced = state.MaterializedRows; + var written = Math.Max(0, state.WrittenRows - 1); + if (DateTime.UtcNow - lastWrite > TimeSpan.FromMilliseconds(ProgressReportIntervalMs)) + { + progress.Report( + $"parallel materialize+write: produced={produced:N0}/{state.TotalRows:N0}, written={written:N0}/{state.TotalRows:N0}, queued={state.QueuedBatchCount:N0}/{queueCapacity:N0}"); + lastWrite = DateTime.UtcNow; + } + + Thread.Sleep(MonitorPollIntervalMs); + } + + Task.WaitAll(producerTasks); + queue.CompleteAdding(); + progress.Report($"Materialization complete ({state.MaterializedRows:N0}/{state.TotalRows:N0}).", force: true); + } + + private static void MonitorWriter(Task writerTask, IProgressReporter progress, PipelineState state, int queueCapacity, CancellationTokenSource cts) + { + var lastWrite = DateTime.MinValue; + while (!writerTask.IsCompleted) + { + if (writerTask.IsFaulted) + RethrowTaskException(writerTask, "Writer task failed."); + if (writerTask.IsCanceled) + throw new OperationCanceledException(); + + if (DateTime.UtcNow - lastWrite > TimeSpan.FromMilliseconds(ProgressReportIntervalMs)) + { + progress.Report($"writing: written={Math.Max(0, state.WrittenRows - 1):N0}/{state.TotalRows:N0}, queued={state.QueuedBatchCount:N0}/{queueCapacity:N0}"); + lastWrite = DateTime.UtcNow; + } + Thread.Sleep(MonitorPollIntervalMs); + } + + progress.Report($"Write complete ({Math.Max(0, state.WrittenRows - 1):N0}/{state.TotalRows:N0}).", force: true); + } + + private static void ProduceNativeRoots(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeRootRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeRoots(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceNativeObjects(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeObjectRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeObjects(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceMemoryRegions(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new MemoryRegionRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForMemoryRegions(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceNativeAllocations(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new NativeAllocationRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForNativeAllocations(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceManagedObjects(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new ManagedObjectRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForManagedObjects(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceConnections(List rows, BlockingCollection queue, PipelineState state, int batchSize, CancellationToken token) + { + ProduceBatches(rows.Count, batchSize, token, start => + { + var end = Math.Min(start + batchSize, rows.Count); + var buffer = new ConnectionRow[end - start]; + rows.CopyTo(start, buffer, 0, buffer.Length); + queue.Add(WriteBatch.ForConnections(buffer), token); + state.IncrementQueuedBatches(); + state.AddMaterialized(buffer.Length); + }); + } + + private static void ProduceBatches(int totalCount, int batchSize, CancellationToken token, Action processBatch) + { + if (totalCount <= 0) + return; + + var batchCount = (totalCount + batchSize - 1) / batchSize; + var starts = new int[batchCount]; + for (var i = 0; i < batchCount; i++) + starts[i] = i * batchSize; + Parallel.ForEach(starts, new ParallelOptions + { + CancellationToken = token, + MaxDegreeOfParallelism = Math.Max(1, Environment.ProcessorCount), + }, start => + { + token.ThrowIfCancellationRequested(); + processBatch(start); + }); + } + + private static void ThrowIfFaulted(Task[] producerTasks, Task writerTask) + { + foreach (var task in producerTasks) + { + if (task.IsFaulted) + RethrowTaskException(task, "Producer task failed."); + if (task.IsCanceled) + throw new OperationCanceledException(); + } + + if (writerTask.IsFaulted) + RethrowTaskException(writerTask, "Writer task failed."); + if (writerTask.IsCanceled) + throw new OperationCanceledException(); + } + + private static void RethrowTaskException(Task task, string fallbackMessage) + { + var aggregate = task.Exception; + if (aggregate == null) + throw new InvalidOperationException(fallbackMessage); + + var inner = aggregate.InnerException ?? aggregate; + ExceptionDispatchInfo.Capture(inner).Throw(); + throw new InvalidOperationException(fallbackMessage); + } +} diff --git a/Core/ExportDestination/DuckDbExportDestination.cs b/Core/ExportDestination/DuckDbExportDestination.cs new file mode 100644 index 0000000..84661d1 --- /dev/null +++ b/Core/ExportDestination/DuckDbExportDestination.cs @@ -0,0 +1,426 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using DuckDB.NET.Data; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// DuckDB implementation of . Writes snapshot tables to a .duckdb file using DuckDB appenders, +/// then builds indexes. Supports validation via row counts and optional referential checks. +/// +internal sealed class DuckDbExportDestination : IExportDestinationWriter +{ + /// + public string DestinationName => "duckdb"; + + #region ConsumeAndWrite + + /// + public WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + { + var directory = Path.GetDirectoryName(dbPath); + if (!string.IsNullOrEmpty(directory)) + Directory.CreateDirectory(directory); + + // Remove any existing DuckDB files so we start fresh. + // DuckDB creates a WAL alongside the main file; both must be deleted to avoid replay. + foreach (var suffix in new[] { "", ".wal" }) + { + var f = dbPath + suffix; + if (File.Exists(f)) + File.Delete(f); + } + + using var connection = new DuckDBConnection($"Data Source={dbPath}"); + connection.Open(); + + var stats = new WriteStats(); + + // Create schema + Exec(connection, SchemaTablesScript); + + // Insert snapshot_info using positional parameters (DuckDB uses ? placeholders) + using (var cmd = connection.CreateCommand()) + { + cmd.CommandText = "INSERT INTO snapshot_info(snapshot_path, exported_at_utc, unity_version) VALUES (?, ?, ?);"; + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.SnapshotPath }); + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.ExportedAtUtc }); + cmd.Parameters.Add(new DuckDBParameter { Value = snapshotInfo.UnityVersion ?? (object)DBNull.Value }); + cmd.ExecuteNonQuery(); + } + state.AddWritten(1); + + var insertSw = Stopwatch.StartNew(); + + // Appenders are scoped so disposal (= flush+commit) is timed separately. + using (var nativeAppender = connection.CreateAppender("native_objects")) + using (var managedAppender = connection.CreateAppender("managed_objects")) + using (var connectionAppender = connection.CreateAppender("connections")) + using (var rootAppender = connection.CreateAppender("native_roots")) + using (var regionAppender = connection.CreateAppender("memory_regions")) + using (var allocationAppender = connection.CreateAppender("native_allocations")) + { + foreach (var batch in queue.GetConsumingEnumerable(token)) + { + token.ThrowIfCancellationRequested(); + state.DecrementQueuedBatches(); + switch (batch.Kind) + { + case WriteBatchKind.NativeObjects: + var nativeSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeObjects) + { + // INTEGER columns get int, BIGINT columns get long (type must match exactly) + nativeAppender.CreateRow() + .AppendValue(row.NativeObjectIndex) // int → INTEGER + .AppendValue(row.InstanceId ?? string.Empty) // string → VARCHAR + .AppendValue(row.Name ?? string.Empty) // string → VARCHAR + .AppendValue(unchecked((long)row.SizeBytes)) // ulong → BIGINT + .AppendValue(row.TypeIndex) // int → INTEGER + .AppendValue(row.NativeTypeName ?? string.Empty) // string → VARCHAR + .AppendValue(row.IsDestroyed) // bool → BOOLEAN + .EndRow(); + } + nativeSw.Stop(); + stats.NativeObjectRows += batch.NativeObjects.Length; + stats.NativeObjectInsertMs += nativeSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeObjects.Length); + break; + + case WriteBatchKind.ManagedObjects: + var managedSw = Stopwatch.StartNew(); + foreach (var row in batch.ManagedObjects) + { + var r = managedAppender.CreateRow() + .AppendValue(row.ManagedObjectIndex) // int → INTEGER + .AppendValue(unchecked((long)row.Address)) // ulong → BIGINT + .AppendValue(row.SizeBytes) // long → BIGINT + .AppendValue(row.TypeIndex) // int → INTEGER + .AppendValue(row.ManagedTypeName ?? string.Empty); // VARCHAR + if (row.NativeObjectIndex >= 0) + r.AppendValue(row.NativeObjectIndex); // long → BIGINT + else + r.AppendNullValue(); + r.EndRow(); + } + managedSw.Stop(); + stats.ManagedObjectRows += batch.ManagedObjects.Length; + stats.ManagedObjectInsertMs += managedSw.ElapsedMilliseconds; + state.AddWritten(batch.ManagedObjects.Length); + break; + + case WriteBatchKind.Connections: + var connSw = Stopwatch.StartNew(); + foreach (var row in batch.Connections) + { + connectionAppender.CreateRow() + .AppendValue(row.FromKind ?? string.Empty) + .AppendValue(row.FromIndex) + .AppendValue(row.ToKind ?? string.Empty) + .AppendValue(row.ToIndex) + .AppendValue(row.ConnectionType ?? string.Empty) + .EndRow(); + } + connSw.Stop(); + stats.ConnectionRows += batch.Connections.Length; + stats.ConnectionInsertMs += connSw.ElapsedMilliseconds; + state.AddWritten(batch.Connections.Length); + break; + + case WriteBatchKind.NativeRoots: + var rootSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeRoots) + { + rootAppender.CreateRow() + .AppendValue(row.RootIndex) // int → INTEGER + .AppendValue(row.RootId) // long → BIGINT + .AppendValue(row.AreaName ?? string.Empty) // VARCHAR + .AppendValue(row.ObjectName ?? string.Empty) // VARCHAR + .AppendValue(unchecked((long)row.AccumulatedSizeBytes)) // ulong → BIGINT + .EndRow(); + } + rootSw.Stop(); + stats.NativeRootRows += batch.NativeRoots.Length; + stats.NativeRootInsertMs += rootSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeRoots.Length); + break; + + case WriteBatchKind.MemoryRegions: + var regionSw = Stopwatch.StartNew(); + foreach (var row in batch.MemoryRegions) + { + var r = regionAppender.CreateRow() + .AppendValue(row.RegionIndex) // int → INTEGER + .AppendValue(unchecked((long)row.AddressBase)) // ulong → BIGINT + .AppendValue(unchecked((long)row.AddressSize)) // ulong → BIGINT + .AppendValue(row.Name ?? string.Empty); // VARCHAR + if (row.ParentRegionIndex >= 0) + r.AppendValue(row.ParentRegionIndex); // int → INTEGER + else + r.AppendNullValue(); + if (row.FirstAllocationIndex >= 0) + r.AppendValue(row.FirstAllocationIndex); // int → INTEGER + else + r.AppendNullValue(); + r.AppendValue(row.NumAllocations).EndRow(); // int → INTEGER + } + regionSw.Stop(); + stats.MemoryRegionRows += batch.MemoryRegions.Length; + stats.MemoryRegionInsertMs += regionSw.ElapsedMilliseconds; + state.AddWritten(batch.MemoryRegions.Length); + break; + + case WriteBatchKind.NativeAllocations: + var allocSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeAllocations) + { + var r = allocationAppender.CreateRow() + .AppendValue(row.AllocationIndex) // int → INTEGER + .AppendValue(unchecked((long)row.Address)) // ulong → BIGINT + .AppendValue(unchecked((long)row.SizeBytes)) // ulong → BIGINT + .AppendValue(unchecked((long)row.OverheadSizeBytes)) // ulong → BIGINT + .AppendValue(unchecked((long)row.PaddingSizeBytes)); // ulong → BIGINT + if (row.MemoryRegionIndex >= 0) + r.AppendValue(row.MemoryRegionIndex); // int → INTEGER + else + r.AppendNullValue(); + r.EndRow(); + } + allocSw.Stop(); + stats.NativeAllocationRows += batch.NativeAllocations.Length; + stats.NativeAllocationInsertMs += allocSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeAllocations.Length); + break; + } + } + } // appenders disposed (flushed + committed) here + + insertSw.Stop(); + stats.TotalInsertMs = insertSw.ElapsedMilliseconds; + // CommitMs is included in TotalInsertMs since disposal happens inside the timed scope. + stats.CommitMs = 0; + + var indexSw = Stopwatch.StartNew(); + Exec(connection, CreateIndexesScript); + indexSw.Stop(); + stats.IndexBuildMs = indexSw.ElapsedMilliseconds; + + return stats; + } + + #endregion + + #region Validation + + /// + public void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + { + if (mode == ValidationMode.None) + return; + + using var connection = new DuckDBConnection($"Data Source={dbPath}"); + connection.Open(); + + var nativeCount = QueryCount(connection, "SELECT COUNT(*) FROM native_objects;"); + var managedCount = QueryCount(connection, "SELECT COUNT(*) FROM managed_objects;"); + var connectionCount = QueryCount(connection, "SELECT COUNT(*) FROM connections;"); + var rootCount = QueryCount(connection, "SELECT COUNT(*) FROM native_roots;"); + var regionCount = QueryCount(connection, "SELECT COUNT(*) FROM memory_regions;"); + var allocationCount = QueryCount(connection, "SELECT COUNT(*) FROM native_allocations;"); + + if (nativeCount != rawData.NativeObjects.Count || + managedCount != rawData.ManagedObjects.Count || + connectionCount != rawData.Connections.Count || + rootCount != rawData.NativeRoots.Count || + regionCount != rawData.MemoryRegions.Count || + allocationCount != rawData.NativeAllocations.Count) + { + throw new InvalidOperationException( + $"DuckDB validation count mismatch. " + + $"expected=(native={rawData.NativeObjects.Count}, managed={rawData.ManagedObjects.Count}, " + + $"connections={rawData.Connections.Count}, roots={rawData.NativeRoots.Count}, " + + $"regions={rawData.MemoryRegions.Count}, allocations={rawData.NativeAllocations.Count}) " + + $"actual=(native={nativeCount}, managed={managedCount}, connections={connectionCount}, " + + $"roots={rootCount}, regions={regionCount}, allocations={allocationCount})"); + } + + if (mode == ValidationMode.Full) + { + var duplicateNativeKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT native_object_index, COUNT(*) c FROM native_objects GROUP BY native_object_index HAVING c > 1);"); + var duplicateManagedKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT managed_object_index, COUNT(*) c FROM managed_objects GROUP BY managed_object_index HAVING c > 1);"); + var duplicateRegionKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT region_index, COUNT(*) c FROM memory_regions GROUP BY region_index HAVING c > 1);"); + var duplicateAllocationKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT allocation_index, COUNT(*) c FROM native_allocations GROUP BY allocation_index HAVING c > 1);"); + if (duplicateNativeKeys > 0 || duplicateManagedKeys > 0 || duplicateRegionKeys > 0 || duplicateAllocationKeys > 0) + throw new InvalidOperationException("DuckDB validation failed: duplicate primary key rows found."); + + var orphanFromManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 FROM managed_objects m WHERE m.managed_object_index = c.from_index + ); + """); + var orphanFromNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 FROM native_objects n WHERE n.native_object_index = c.from_index + ); + """); + var orphanToManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 FROM managed_objects m WHERE m.managed_object_index = c.to_index + ); + """); + var orphanToNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 FROM native_objects n WHERE n.native_object_index = c.to_index + ); + """); + var unknownKinds = QueryCount(connection, """ + SELECT COUNT(*) FROM connections + WHERE from_kind NOT IN ('managed_object','native_object') + OR to_kind NOT IN ('managed_object','native_object'); + """); + var orphanAllocationRegionRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM native_allocations a + WHERE a.memory_region_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM memory_regions r WHERE r.region_index = a.memory_region_index + ); + """); + var orphanRegionFirstAllocationRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM memory_regions r + WHERE r.first_allocation_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 FROM native_allocations a WHERE a.allocation_index = r.first_allocation_index + ); + """); + + if (orphanFromManaged > 0 || orphanFromNative > 0 || orphanToManaged > 0 || orphanToNative > 0 || unknownKinds > 0 || + orphanAllocationRegionRefs > 0 || orphanRegionFirstAllocationRefs > 0) + { + throw new InvalidOperationException( + $"DuckDB validation failed: invalid graph or memory-map references. " + + $"orphan_from_managed={orphanFromManaged}, orphan_from_native={orphanFromNative}, " + + $"orphan_to_managed={orphanToManaged}, orphan_to_native={orphanToNative}, unknown_kinds={unknownKinds}, " + + $"orphan_allocation_region_refs={orphanAllocationRegionRefs}, orphan_region_first_allocation_refs={orphanRegionFirstAllocationRefs}"); + } + } + } + + #endregion + + #region Helpers + + private static void Exec(DuckDBConnection connection, string sql) + { + // DuckDB doesn't support multiple statements in one ExecuteNonQuery call; + // split on semicolons and run each statement individually. + foreach (var stmt in sql.Split(';', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries)) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = stmt; + cmd.ExecuteNonQuery(); + } + } + + private static long QueryCount(DuckDBConnection connection, string sql) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = sql; + var result = cmd.ExecuteScalar(); + return Convert.ToInt64(result); + } + + #endregion + + #region Schema + + // Column types must match C# value types passed to the Appender exactly + // (DuckDB Appender reads raw bytes; passing int to BIGINT column corrupts data). + // int → INTEGER (32-bit), long/ulong-cast → BIGINT (64-bit). + private const string SchemaTablesScript = """ +CREATE OR REPLACE TABLE snapshot_info ( + snapshot_path VARCHAR NOT NULL, + exported_at_utc VARCHAR NOT NULL, + unity_version VARCHAR +); + +CREATE OR REPLACE TABLE native_objects ( + native_object_index INTEGER PRIMARY KEY, + instance_id VARCHAR, + name VARCHAR, + size_bytes BIGINT NOT NULL, + type_index INTEGER, + native_type_name VARCHAR, + is_destroyed BOOLEAN NOT NULL +); + +CREATE OR REPLACE TABLE managed_objects ( + managed_object_index INTEGER PRIMARY KEY, + address BIGINT NOT NULL, + size_bytes BIGINT NOT NULL, + type_index INTEGER, + managed_type_name VARCHAR, + native_object_index BIGINT +); + +CREATE OR REPLACE TABLE connections ( + from_kind VARCHAR NOT NULL, + from_index BIGINT NOT NULL, + to_kind VARCHAR NOT NULL, + to_index BIGINT NOT NULL, + connection_type VARCHAR NOT NULL +); + +CREATE OR REPLACE TABLE native_roots ( + root_index INTEGER PRIMARY KEY, + root_id BIGINT NOT NULL, + area_name VARCHAR, + object_name VARCHAR, + accumulated_size_bytes BIGINT NOT NULL +); + +CREATE OR REPLACE TABLE memory_regions ( + region_index INTEGER PRIMARY KEY, + address_base BIGINT NOT NULL, + address_size BIGINT NOT NULL, + name VARCHAR, + parent_region_index INTEGER, + first_allocation_index INTEGER, + num_allocations INTEGER NOT NULL +); + +CREATE OR REPLACE TABLE native_allocations ( + allocation_index INTEGER PRIMARY KEY, + address BIGINT NOT NULL, + size_bytes BIGINT NOT NULL, + overhead_size_bytes BIGINT NOT NULL, + padding_size_bytes BIGINT NOT NULL, + memory_region_index INTEGER +); +"""; + + private const string CreateIndexesScript = """ +CREATE INDEX idx_connections_from ON connections(from_kind, from_index); +CREATE INDEX idx_connections_to ON connections(to_kind, to_index); +CREATE INDEX idx_native_objects_instance_id ON native_objects(instance_id); +CREATE INDEX idx_native_objects_is_destroyed ON native_objects(is_destroyed); +CREATE INDEX idx_managed_objects_address ON managed_objects(address); +CREATE INDEX idx_memory_regions_address_base ON memory_regions(address_base); +CREATE INDEX idx_native_allocations_address ON native_allocations(address); +CREATE INDEX idx_native_allocations_region ON native_allocations(memory_region_index); +"""; + + #endregion +} diff --git a/Core/ExportDestination/ExportDestinationFactory.cs b/Core/ExportDestination/ExportDestinationFactory.cs new file mode 100644 index 0000000..6d89c4f --- /dev/null +++ b/Core/ExportDestination/ExportDestinationFactory.cs @@ -0,0 +1,18 @@ +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Factory for creating the appropriate based on . +/// +public static class ExportDestinationFactory +{ + /// Creates a writer for the specified database backend. + /// DuckDB or SQLite. + /// An implementation of . + /// If is not a known value. + public static IExportDestinationWriter Create(DestinationKind kind) => kind switch + { + DestinationKind.DuckDb => new DuckDbExportDestination(), + DestinationKind.Sqlite => new SqliteExportDestination(), + _ => throw new ArgumentOutOfRangeException(nameof(kind), kind, null), + }; +} diff --git a/Core/ExportDestination/IExportDestinationWriter.cs b/Core/ExportDestination/IExportDestinationWriter.cs new file mode 100644 index 0000000..37dec7e --- /dev/null +++ b/Core/ExportDestination/IExportDestinationWriter.cs @@ -0,0 +1,38 @@ +using System.Collections.Concurrent; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Abstraction for writing snapshot data to a database. Implementations (e.g. DuckDB, SQLite) consume from a queue, +/// write to the given path, update , and optionally support post-write validation. +/// +public interface IExportDestinationWriter +{ + /// Display name of the destination (e.g. "DuckDB", "SQLite") for progress and errors. + string DestinationName { get; } + + /// + /// Consumes batches from the queue until is true, writes all tables to the database, + /// and returns per-table row counts and timings. Updates as batches are written. + /// + /// Output database file path. + /// Metadata to write (e.g. to snapshot_info table). + /// Bounded queue of write batches; adding is completed by the pipeline when producers finish. + /// Shared state to update (written rows, queued batch count). + /// Cancellation token. + /// Per-table row counts and insert/commit/index timings. + WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token); + + /// + /// Runs optional validation on the written database (e.g. row count checks, referential integrity) according to . + /// + /// Path to the database file. + /// Original snapshot data used for expected counts. + /// Validation level (none, minimal, full). + void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode); +} diff --git a/Core/ExportDestination/SqliteExportDestination.cs b/Core/ExportDestination/SqliteExportDestination.cs new file mode 100644 index 0000000..dfa2b64 --- /dev/null +++ b/Core/ExportDestination/SqliteExportDestination.cs @@ -0,0 +1,26 @@ +using System.Collections.Concurrent; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// SQLite implementation of . Delegates to for writing and validation. +/// Writes snapshot tables to a .db file with WAL mode and bulk inserts. +/// +internal sealed class SqliteExportDestination : IExportDestinationWriter +{ + /// + public string DestinationName => "sqlite"; + + /// + public WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + => SqliteWriter.ConsumeAndWrite(dbPath, snapshotInfo, queue, state, token); + + /// + public void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + => SqliteWriter.Validate(dbPath, rawData, mode); +} diff --git a/Core/ExportDestination/SqliteWriter.cs b/Core/ExportDestination/SqliteWriter.cs new file mode 100644 index 0000000..7a01266 --- /dev/null +++ b/Core/ExportDestination/SqliteWriter.cs @@ -0,0 +1,721 @@ +using System.Collections.Concurrent; +using System.Diagnostics; +using System.Text; +using Microsoft.Data.Sqlite; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.ExportDestination; + +/// +/// Static helper for writing snapshot data to SQLite: schema creation, bulk inserts from queue, +/// and optional validation (row counts and referential integrity). +/// Used by . +/// +internal static class SqliteWriter +{ + private const int MaxSqlParametersPerStatement = 900; + private const int DefaultRowsPerBulkInsert = 128; + + #region Validation + + /// + /// Validates the database at : for minimal mode checks row counts against ; + /// for full mode also checks primary key uniqueness and connection/region/allocation referential integrity. + /// + /// Path to the SQLite database file. + /// Expected snapshot data for count comparison. + /// Validation level (none, minimal, full). + /// If counts or referential checks fail. + public static void Validate(string dbPath, RawSnapshotData rawData, ValidationMode mode) + { + if (mode == ValidationMode.None) + return; + + using var connection = new SqliteConnection($"Data Source={dbPath}"); + connection.Open(); + + var nativeCount = QueryCount(connection, "SELECT COUNT(*) FROM native_objects;"); + var managedCount = QueryCount(connection, "SELECT COUNT(*) FROM managed_objects;"); + var connectionCount = QueryCount(connection, "SELECT COUNT(*) FROM connections;"); + var rootCount = QueryCount(connection, "SELECT COUNT(*) FROM native_roots;"); + var regionCount = QueryCount(connection, "SELECT COUNT(*) FROM memory_regions;"); + var allocationCount = QueryCount(connection, "SELECT COUNT(*) FROM native_allocations;"); + + if (nativeCount != rawData.NativeObjects.Count || + managedCount != rawData.ManagedObjects.Count || + connectionCount != rawData.Connections.Count || + rootCount != rawData.NativeRoots.Count || + regionCount != rawData.MemoryRegions.Count || + allocationCount != rawData.NativeAllocations.Count) + { + throw new InvalidOperationException("SQLite validation count mismatch between extracted rows and persisted rows."); + } + + if (mode == ValidationMode.Full) + { + // Quick full-mode sanity check on key uniqueness and not-null semantics. + var duplicateNativeKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT native_object_index, COUNT(*) c FROM native_objects GROUP BY native_object_index HAVING c > 1);"); + var duplicateManagedKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT managed_object_index, COUNT(*) c FROM managed_objects GROUP BY managed_object_index HAVING c > 1);"); + var duplicateRegionKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT region_index, COUNT(*) c FROM memory_regions GROUP BY region_index HAVING c > 1);"); + var duplicateAllocationKeys = QueryCount(connection, "SELECT COUNT(*) FROM (SELECT allocation_index, COUNT(*) c FROM native_allocations GROUP BY allocation_index HAVING c > 1);"); + if (duplicateNativeKeys > 0 || duplicateManagedKeys > 0 || duplicateRegionKeys > 0 || duplicateAllocationKeys > 0) + throw new InvalidOperationException("SQLite validation failed: duplicate primary key rows found."); + + var orphanFromManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 + FROM managed_objects m + WHERE m.managed_object_index = c.from_index + ); + """); + var orphanFromNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.from_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 + FROM native_objects n + WHERE n.native_object_index = c.from_index + ); + """); + var orphanToManaged = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'managed_object' + AND NOT EXISTS ( + SELECT 1 + FROM managed_objects m + WHERE m.managed_object_index = c.to_index + ); + """); + var orphanToNative = QueryCount(connection, """ + SELECT COUNT(*) FROM connections c + WHERE c.to_kind = 'native_object' + AND NOT EXISTS ( + SELECT 1 + FROM native_objects n + WHERE n.native_object_index = c.to_index + ); + """); + var unknownKinds = QueryCount(connection, """ + SELECT COUNT(*) FROM connections + WHERE from_kind NOT IN ('managed_object','native_object') + OR to_kind NOT IN ('managed_object','native_object'); + """); + var orphanAllocationRegionRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM native_allocations a + WHERE a.memory_region_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM memory_regions r + WHERE r.region_index = a.memory_region_index + ); + """); + var orphanRegionFirstAllocationRefs = QueryCount(connection, """ + SELECT COUNT(*) FROM memory_regions r + WHERE r.first_allocation_index IS NOT NULL + AND NOT EXISTS ( + SELECT 1 + FROM native_allocations a + WHERE a.allocation_index = r.first_allocation_index + ); + """); + + if (orphanFromManaged > 0 || orphanFromNative > 0 || orphanToManaged > 0 || orphanToNative > 0 || unknownKinds > 0 || + orphanAllocationRegionRefs > 0 || orphanRegionFirstAllocationRefs > 0) + { + throw new InvalidOperationException( + $"SQLite validation failed: invalid graph or memory-map references. " + + $"orphan_from_managed={orphanFromManaged}, orphan_from_native={orphanFromNative}, " + + $"orphan_to_managed={orphanToManaged}, orphan_to_native={orphanToNative}, unknown_kinds={unknownKinds}, " + + $"orphan_allocation_region_refs={orphanAllocationRegionRefs}, orphan_region_first_allocation_refs={orphanRegionFirstAllocationRefs}"); + } + } + } + + #endregion + + #region ConsumeAndWrite + + /// + /// Consumes batches from the queue, writes all tables to the SQLite database, and returns per-table row counts and timings. + /// Creates the directory for if needed, enables WAL mode, and runs schema creation and bulk inserts inside a transaction. + /// + /// Output database file path. + /// Metadata to insert into snapshot_info. + /// Bounded queue of write batches. + /// Shared pipeline state to update. + /// Cancellation token. + /// Per-table row counts and insert/commit/index timings. + public static WriteStats ConsumeAndWrite( + string dbPath, + SnapshotInfo snapshotInfo, + BlockingCollection queue, + PipelineState state, + CancellationToken token) + { + var directory = Path.GetDirectoryName(dbPath); + if (!string.IsNullOrEmpty(directory)) + Directory.CreateDirectory(directory); + + using var connection = new SqliteConnection($"Data Source={dbPath}"); + connection.Open(); + Exec(connection, null, "PRAGMA journal_mode=WAL;"); + Exec(connection, null, "PRAGMA synchronous=NORMAL;"); + Exec(connection, null, "PRAGMA temp_store=MEMORY;"); + Exec(connection, null, "PRAGMA cache_size=-200000;"); + + var stats = new WriteStats(); + + using var transaction = connection.BeginTransaction(); + try + { + ExecScript(connection, transaction, SchemaTablesScript); + + using var snapshotCmd = connection.CreateCommand(); + snapshotCmd.Transaction = transaction; + snapshotCmd.CommandText = "INSERT INTO snapshot_info(snapshot_path, exported_at_utc, unity_version) VALUES ($p, $e, $u);"; + snapshotCmd.Parameters.AddWithValue("$p", snapshotInfo.SnapshotPath); + snapshotCmd.Parameters.AddWithValue("$e", snapshotInfo.ExportedAtUtc); + snapshotCmd.Parameters.AddWithValue("$u", snapshotInfo.UnityVersion); + snapshotCmd.ExecuteNonQuery(); + state.AddWritten(1); + var insertSw = Stopwatch.StartNew(); + using var nativeCmd = PrepareNativeInsert(connection, transaction); + using var managedCmd = PrepareManagedInsert(connection, transaction); + using var connectionCmd = PrepareConnectionInsert(connection, transaction); + using var rootCmd = PrepareRootInsert(connection, transaction); + using var regionCmd = PrepareRegionInsert(connection, transaction); + using var allocationCmd = PrepareAllocationInsert(connection, transaction); + + foreach (var batch in queue.GetConsumingEnumerable(token)) + { + token.ThrowIfCancellationRequested(); + state.DecrementQueuedBatches(); + switch (batch.Kind) + { + case WriteBatchKind.NativeObjects: + var nativeSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeObjects) + { + nativeCmd.Parameters[0].Value = row.NativeObjectIndex; + nativeCmd.Parameters[1].Value = row.InstanceId ?? string.Empty; + nativeCmd.Parameters[2].Value = row.Name ?? string.Empty; + nativeCmd.Parameters[3].Value = unchecked((long)row.SizeBytes); + nativeCmd.Parameters[4].Value = row.TypeIndex; + nativeCmd.Parameters[5].Value = row.NativeTypeName ?? string.Empty; + nativeCmd.Parameters[6].Value = row.IsDestroyed ? 1 : 0; + nativeCmd.ExecuteNonQuery(); + } + nativeSw.Stop(); + stats.NativeObjectRows += batch.NativeObjects.Length; + stats.NativeObjectInsertMs += nativeSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeObjects.Length); + break; + + case WriteBatchKind.ManagedObjects: + var managedSw = Stopwatch.StartNew(); + foreach (var row in batch.ManagedObjects) + { + managedCmd.Parameters[0].Value = row.ManagedObjectIndex; + managedCmd.Parameters[1].Value = unchecked((long)row.Address); + managedCmd.Parameters[2].Value = row.SizeBytes; + managedCmd.Parameters[3].Value = row.TypeIndex; + managedCmd.Parameters[4].Value = row.ManagedTypeName ?? string.Empty; + managedCmd.Parameters[5].Value = row.NativeObjectIndex >= 0 ? row.NativeObjectIndex : DBNull.Value; + managedCmd.ExecuteNonQuery(); + } + managedSw.Stop(); + stats.ManagedObjectRows += batch.ManagedObjects.Length; + stats.ManagedObjectInsertMs += managedSw.ElapsedMilliseconds; + state.AddWritten(batch.ManagedObjects.Length); + break; + + case WriteBatchKind.Connections: + var connectionSw = Stopwatch.StartNew(); + foreach (var row in batch.Connections) + { + connectionCmd.Parameters[0].Value = row.FromKind ?? string.Empty; + connectionCmd.Parameters[1].Value = row.FromIndex; + connectionCmd.Parameters[2].Value = row.ToKind ?? string.Empty; + connectionCmd.Parameters[3].Value = row.ToIndex; + connectionCmd.Parameters[4].Value = row.ConnectionType ?? string.Empty; + connectionCmd.ExecuteNonQuery(); + } + connectionSw.Stop(); + stats.ConnectionRows += batch.Connections.Length; + stats.ConnectionInsertMs += connectionSw.ElapsedMilliseconds; + state.AddWritten(batch.Connections.Length); + break; + + case WriteBatchKind.NativeRoots: + var rootSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeRoots) + { + rootCmd.Parameters[0].Value = row.RootIndex; + rootCmd.Parameters[1].Value = row.RootId; + rootCmd.Parameters[2].Value = row.AreaName ?? string.Empty; + rootCmd.Parameters[3].Value = row.ObjectName ?? string.Empty; + rootCmd.Parameters[4].Value = unchecked((long)row.AccumulatedSizeBytes); + rootCmd.ExecuteNonQuery(); + } + rootSw.Stop(); + stats.NativeRootRows += batch.NativeRoots.Length; + stats.NativeRootInsertMs += rootSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeRoots.Length); + break; + + case WriteBatchKind.MemoryRegions: + var regionSw = Stopwatch.StartNew(); + foreach (var row in batch.MemoryRegions) + { + regionCmd.Parameters[0].Value = row.RegionIndex; + regionCmd.Parameters[1].Value = unchecked((long)row.AddressBase); + regionCmd.Parameters[2].Value = unchecked((long)row.AddressSize); + regionCmd.Parameters[3].Value = row.Name ?? string.Empty; + regionCmd.Parameters[4].Value = row.ParentRegionIndex >= 0 ? row.ParentRegionIndex : DBNull.Value; + regionCmd.Parameters[5].Value = row.FirstAllocationIndex >= 0 ? row.FirstAllocationIndex : DBNull.Value; + regionCmd.Parameters[6].Value = row.NumAllocations; + regionCmd.ExecuteNonQuery(); + } + regionSw.Stop(); + stats.MemoryRegionRows += batch.MemoryRegions.Length; + stats.MemoryRegionInsertMs += regionSw.ElapsedMilliseconds; + state.AddWritten(batch.MemoryRegions.Length); + break; + + case WriteBatchKind.NativeAllocations: + var allocationSw = Stopwatch.StartNew(); + foreach (var row in batch.NativeAllocations) + { + allocationCmd.Parameters[0].Value = row.AllocationIndex; + allocationCmd.Parameters[1].Value = unchecked((long)row.Address); + allocationCmd.Parameters[2].Value = unchecked((long)row.SizeBytes); + allocationCmd.Parameters[3].Value = unchecked((long)row.OverheadSizeBytes); + allocationCmd.Parameters[4].Value = unchecked((long)row.PaddingSizeBytes); + allocationCmd.Parameters[5].Value = row.MemoryRegionIndex >= 0 ? row.MemoryRegionIndex : DBNull.Value; + allocationCmd.ExecuteNonQuery(); + } + allocationSw.Stop(); + stats.NativeAllocationRows += batch.NativeAllocations.Length; + stats.NativeAllocationInsertMs += allocationSw.ElapsedMilliseconds; + state.AddWritten(batch.NativeAllocations.Length); + break; + } + } + insertSw.Stop(); + stats.TotalInsertMs = insertSw.ElapsedMilliseconds; + + var commitSw = Stopwatch.StartNew(); + transaction.Commit(); + commitSw.Stop(); + stats.CommitMs = commitSw.ElapsedMilliseconds; + + var indexSw = Stopwatch.StartNew(); + using (var indexTransaction = connection.BeginTransaction()) + { + ExecScript(connection, indexTransaction, CreateIndexesScript); + indexTransaction.Commit(); + } + indexSw.Stop(); + stats.IndexBuildMs = indexSw.ElapsedMilliseconds; + return stats; + } + catch + { + try + { + transaction.Rollback(); + } + catch + { + // Keep original failure. + } + throw; + } + } + + #endregion + + #region Schema + + private static int RowsPerStatement(int columnCount) + { + var byParams = Math.Max(1, MaxSqlParametersPerStatement / Math.Max(1, columnCount)); + return Math.Max(1, Math.Min(DefaultRowsPerBulkInsert, byParams)); + } + + private static SqliteCommand PrepareNativeInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_objects(native_object_index, instance_id, name, size_bytes, type_index, native_type_name, is_destroyed) VALUES ($i, $id, $n, $s, $t, $tn, $d);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$id", SqliteType.Text); + _ = command.Parameters.Add("$n", SqliteType.Text); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$t", SqliteType.Integer); + _ = command.Parameters.Add("$tn", SqliteType.Text); + _ = command.Parameters.Add("$d", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareManagedInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO managed_objects(managed_object_index, address, size_bytes, type_index, managed_type_name, native_object_index) VALUES ($i, $a, $s, $t, $tn, $ni);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Integer); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$t", SqliteType.Integer); + _ = command.Parameters.Add("$tn", SqliteType.Text); + _ = command.Parameters.Add("$ni", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareConnectionInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO connections(from_kind, from_index, to_kind, to_index, connection_type) VALUES ($fk, $fi, $tk, $ti, $ct);"; + _ = command.Parameters.Add("$fk", SqliteType.Text); + _ = command.Parameters.Add("$fi", SqliteType.Integer); + _ = command.Parameters.Add("$tk", SqliteType.Text); + _ = command.Parameters.Add("$ti", SqliteType.Integer); + _ = command.Parameters.Add("$ct", SqliteType.Text); + return command; + } + + private static SqliteCommand PrepareRootInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_roots(root_index, root_id, area_name, object_name, accumulated_size_bytes) VALUES ($i, $rid, $a, $o, $s);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$rid", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Text); + _ = command.Parameters.Add("$o", SqliteType.Text); + _ = command.Parameters.Add("$s", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareRegionInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO memory_regions(region_index, address_base, address_size, name, parent_region_index, first_allocation_index, num_allocations) VALUES ($i, $ab, $as, $n, $p, $f, $c);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$ab", SqliteType.Integer); + _ = command.Parameters.Add("$as", SqliteType.Integer); + _ = command.Parameters.Add("$n", SqliteType.Text); + _ = command.Parameters.Add("$p", SqliteType.Integer); + _ = command.Parameters.Add("$f", SqliteType.Integer); + _ = command.Parameters.Add("$c", SqliteType.Integer); + return command; + } + + private static SqliteCommand PrepareAllocationInsert(SqliteConnection connection, SqliteTransaction tx) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = "INSERT INTO native_allocations(allocation_index, address, size_bytes, overhead_size_bytes, padding_size_bytes, memory_region_index) VALUES ($i, $a, $s, $o, $p, $r);"; + _ = command.Parameters.Add("$i", SqliteType.Integer); + _ = command.Parameters.Add("$a", SqliteType.Integer); + _ = command.Parameters.Add("$s", SqliteType.Integer); + _ = command.Parameters.Add("$o", SqliteType.Integer); + _ = command.Parameters.Add("$p", SqliteType.Integer); + _ = command.Parameters.Add("$r", SqliteType.Integer); + return command; + } + + #endregion + + #region Bulk insert + + private static SqliteCommand CreateBulkInsertCommand( + SqliteConnection connection, + SqliteTransaction tx, + string insertPrefix, + int rowCount, + int columnCount) + { + var command = connection.CreateCommand(); + command.Transaction = tx; + var sql = new StringBuilder(insertPrefix.Length + rowCount * (columnCount * 6 + 3)); + sql.Append(insertPrefix); + for (var row = 0; row < rowCount; row++) + { + if (row > 0) + sql.Append(','); + + sql.Append('('); + for (var col = 0; col < columnCount; col++) + { + if (col > 0) + sql.Append(','); + sql.Append("$p").Append(row * columnCount + col); + } + sql.Append(')'); + } + + command.CommandText = sql.ToString(); + return command; + } + + private static void WriteNativeObjectRows(SqliteConnection connection, SqliteTransaction tx, NativeObjectRow[] rows) + { + const int cols = 7; + const string insertPrefix = "INSERT INTO native_objects(native_object_index, instance_id, name, size_bytes, type_index, native_type_name, is_destroyed) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.NativeObjectIndex); + command.Parameters.AddWithValue($"$p{p + 1}", row.InstanceId ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 2}", row.Name ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", unchecked((long)row.SizeBytes)); + command.Parameters.AddWithValue($"$p{p + 4}", row.TypeIndex); + command.Parameters.AddWithValue($"$p{p + 5}", row.NativeTypeName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 6}", row.IsDestroyed ? 1 : 0); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteManagedObjectRows(SqliteConnection connection, SqliteTransaction tx, ManagedObjectRow[] rows) + { + const int cols = 6; + const string insertPrefix = "INSERT INTO managed_objects(managed_object_index, address, size_bytes, type_index, managed_type_name, native_object_index) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.ManagedObjectIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.Address)); + command.Parameters.AddWithValue($"$p{p + 2}", row.SizeBytes); + command.Parameters.AddWithValue($"$p{p + 3}", row.TypeIndex); + command.Parameters.AddWithValue($"$p{p + 4}", row.ManagedTypeName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 5}", row.NativeObjectIndex >= 0 ? row.NativeObjectIndex : DBNull.Value); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteConnectionRows(SqliteConnection connection, SqliteTransaction tx, ConnectionRow[] rows) + { + const int cols = 5; + const string insertPrefix = "INSERT INTO connections(from_kind, from_index, to_kind, to_index, connection_type) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.FromKind ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 1}", row.FromIndex); + command.Parameters.AddWithValue($"$p{p + 2}", row.ToKind ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", row.ToIndex); + command.Parameters.AddWithValue($"$p{p + 4}", row.ConnectionType ?? string.Empty); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteNativeRootRows(SqliteConnection connection, SqliteTransaction tx, NativeRootRow[] rows) + { + const int cols = 5; + const string insertPrefix = "INSERT INTO native_roots(root_index, root_id, area_name, object_name, accumulated_size_bytes) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.RootIndex); + command.Parameters.AddWithValue($"$p{p + 1}", row.RootId); + command.Parameters.AddWithValue($"$p{p + 2}", row.AreaName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 3}", row.ObjectName ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 4}", unchecked((long)row.AccumulatedSizeBytes)); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteMemoryRegionRows(SqliteConnection connection, SqliteTransaction tx, MemoryRegionRow[] rows) + { + const int cols = 7; + const string insertPrefix = "INSERT INTO memory_regions(region_index, address_base, address_size, name, parent_region_index, first_allocation_index, num_allocations) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.RegionIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.AddressBase)); + command.Parameters.AddWithValue($"$p{p + 2}", unchecked((long)row.AddressSize)); + command.Parameters.AddWithValue($"$p{p + 3}", row.Name ?? string.Empty); + command.Parameters.AddWithValue($"$p{p + 4}", row.ParentRegionIndex >= 0 ? row.ParentRegionIndex : DBNull.Value); + command.Parameters.AddWithValue($"$p{p + 5}", row.FirstAllocationIndex >= 0 ? row.FirstAllocationIndex : DBNull.Value); + command.Parameters.AddWithValue($"$p{p + 6}", row.NumAllocations); + } + command.ExecuteNonQuery(); + } + } + + private static void WriteNativeAllocationRows(SqliteConnection connection, SqliteTransaction tx, NativeAllocationRow[] rows) + { + const int cols = 6; + const string insertPrefix = "INSERT INTO native_allocations(allocation_index, address, size_bytes, overhead_size_bytes, padding_size_bytes, memory_region_index) VALUES "; + var rowsPerStatement = RowsPerStatement(cols); + for (var start = 0; start < rows.Length; start += rowsPerStatement) + { + var count = Math.Min(rowsPerStatement, rows.Length - start); + using var command = CreateBulkInsertCommand(connection, tx, insertPrefix, count, cols); + for (var i = 0; i < count; i++) + { + var row = rows[start + i]; + var p = i * cols; + command.Parameters.AddWithValue($"$p{p}", row.AllocationIndex); + command.Parameters.AddWithValue($"$p{p + 1}", unchecked((long)row.Address)); + command.Parameters.AddWithValue($"$p{p + 2}", unchecked((long)row.SizeBytes)); + command.Parameters.AddWithValue($"$p{p + 3}", unchecked((long)row.OverheadSizeBytes)); + command.Parameters.AddWithValue($"$p{p + 4}", unchecked((long)row.PaddingSizeBytes)); + command.Parameters.AddWithValue($"$p{p + 5}", row.MemoryRegionIndex >= 0 ? row.MemoryRegionIndex : DBNull.Value); + } + command.ExecuteNonQuery(); + } + } + + #endregion + + #region Helpers + + private static void Exec(SqliteConnection connection, SqliteTransaction? tx, string sql) + { + using var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = sql; + command.ExecuteNonQuery(); + } + + private static void ExecScript(SqliteConnection connection, SqliteTransaction tx, string sql) + { + using var command = connection.CreateCommand(); + command.Transaction = tx; + command.CommandText = sql; + command.ExecuteNonQuery(); + } + + private static long QueryCount(SqliteConnection connection, string sql) + { + using var cmd = connection.CreateCommand(); + cmd.CommandText = sql; + var result = cmd.ExecuteScalar(); + return Convert.ToInt64(result); + } + + #endregion + + private const string SchemaTablesScript = """ +DROP TABLE IF EXISTS snapshot_info; +DROP TABLE IF EXISTS native_objects; +DROP TABLE IF EXISTS managed_objects; +DROP TABLE IF EXISTS connections; +DROP TABLE IF EXISTS native_roots; +DROP TABLE IF EXISTS memory_regions; +DROP TABLE IF EXISTS native_allocations; + +CREATE TABLE snapshot_info ( + snapshot_path TEXT NOT NULL, + exported_at_utc TEXT NOT NULL, + unity_version TEXT +); + +CREATE TABLE native_objects ( + native_object_index INTEGER PRIMARY KEY, + instance_id TEXT, + name TEXT, + size_bytes INTEGER NOT NULL, + type_index INTEGER, + native_type_name TEXT, + is_destroyed INTEGER NOT NULL DEFAULT 0 +); + +CREATE TABLE managed_objects ( + managed_object_index INTEGER PRIMARY KEY, + address INTEGER NOT NULL, + size_bytes INTEGER NOT NULL, + type_index INTEGER, + managed_type_name TEXT, + native_object_index INTEGER +); + +CREATE TABLE connections ( + from_kind TEXT NOT NULL, + from_index INTEGER NOT NULL, + to_kind TEXT NOT NULL, + to_index INTEGER NOT NULL, + connection_type TEXT NOT NULL +); + +CREATE TABLE native_roots ( + root_index INTEGER PRIMARY KEY, + root_id INTEGER NOT NULL, + area_name TEXT, + object_name TEXT, + accumulated_size_bytes INTEGER NOT NULL +); + +CREATE TABLE memory_regions ( + region_index INTEGER PRIMARY KEY, + address_base INTEGER NOT NULL, + address_size INTEGER NOT NULL, + name TEXT, + parent_region_index INTEGER, + first_allocation_index INTEGER, + num_allocations INTEGER NOT NULL +); + +CREATE TABLE native_allocations ( + allocation_index INTEGER PRIMARY KEY, + address INTEGER NOT NULL, + size_bytes INTEGER NOT NULL, + overhead_size_bytes INTEGER NOT NULL, + padding_size_bytes INTEGER NOT NULL, + memory_region_index INTEGER +); +"""; + + private const string CreateIndexesScript = """ +CREATE INDEX idx_connections_from ON connections(from_kind, from_index); +CREATE INDEX idx_connections_to ON connections(to_kind, to_index); +CREATE INDEX idx_native_objects_instance_id ON native_objects(instance_id); +CREATE INDEX idx_native_objects_is_destroyed ON native_objects(is_destroyed); +CREATE INDEX idx_managed_objects_address ON managed_objects(address); +CREATE INDEX idx_memory_regions_address_base ON memory_regions(address_base); +CREATE INDEX idx_native_allocations_address ON native_allocations(address); +CREATE INDEX idx_native_allocations_region ON native_allocations(memory_region_index); +"""; +} + diff --git a/Core/MemorySnapshotDataTools.Core.csproj b/Core/MemorySnapshotDataTools.Core.csproj new file mode 100644 index 0000000..3274da5 --- /dev/null +++ b/Core/MemorySnapshotDataTools.Core.csproj @@ -0,0 +1,18 @@ + + + net10.0 + enable + enable + latest + MemorySnapshotDataTools + MemorySnapshotDataTools.Core + MemorySnapshotDataTools.Tests + MemorySnapshotDataTools + 0.1.0 + + + + + + + diff --git a/Core/Models/ExportPipeline.cs b/Core/Models/ExportPipeline.cs new file mode 100644 index 0000000..a735e97 --- /dev/null +++ b/Core/Models/ExportPipeline.cs @@ -0,0 +1,219 @@ +using System.Threading; + +namespace MemorySnapshotDataTools; + +/// +/// Kind of batch in the producer/consumer pipeline: each batch carries one table's rows. +/// +public enum WriteBatchKind +{ + NativeObjects, + ManagedObjects, + Connections, + NativeRoots, + MemoryRegions, + NativeAllocations, +} + +/// +/// A single batch of rows to write, produced by the export pipeline and consumed by +/// . +/// Only the list matching is populated. +/// +public sealed class WriteBatch +{ + /// Which table this batch belongs to. + public WriteBatchKind Kind { get; init; } + + /// Populated when is . + public NativeObjectRow[] NativeObjects { get; init; } = []; + + /// Populated when is . + public ManagedObjectRow[] ManagedObjects { get; init; } = []; + + /// Populated when is . + public ConnectionRow[] Connections { get; init; } = []; + + /// Populated when is . + public NativeRootRow[] NativeRoots { get; init; } = []; + + /// Populated when is . + public MemoryRegionRow[] MemoryRegions { get; init; } = []; + + /// Populated when is . + public NativeAllocationRow[] NativeAllocations { get; init; } = []; + + /// Creates a batch of native object rows. + public static WriteBatch ForNativeObjects(NativeObjectRow[] rows) => new() { Kind = WriteBatchKind.NativeObjects, NativeObjects = rows }; + + /// Creates a batch of managed object rows. + public static WriteBatch ForManagedObjects(ManagedObjectRow[] rows) => new() { Kind = WriteBatchKind.ManagedObjects, ManagedObjects = rows }; + + /// Creates a batch of connection rows. + public static WriteBatch ForConnections(ConnectionRow[] rows) => new() { Kind = WriteBatchKind.Connections, Connections = rows }; + + /// Creates a batch of native root rows. + public static WriteBatch ForNativeRoots(NativeRootRow[] rows) => new() { Kind = WriteBatchKind.NativeRoots, NativeRoots = rows }; + + /// Creates a batch of memory region rows. + public static WriteBatch ForMemoryRegions(MemoryRegionRow[] rows) => new() { Kind = WriteBatchKind.MemoryRegions, MemoryRegions = rows }; + + /// Creates a batch of native allocation rows. + public static WriteBatch ForNativeAllocations(NativeAllocationRow[] rows) => new() { Kind = WriteBatchKind.NativeAllocations, NativeAllocations = rows }; +} + +/// +/// Shared state for the export pipeline: total rows, materialized count, written count, and queued batch count. +/// Updated by producers (materialized, queued) and the writer (written, queued). Used for progress and sanity checks. +/// +public sealed class PipelineState +{ + /// + /// Creates state for a run with the given total row count (for progress). + /// + public PipelineState(long totalRows) + { + TotalRows = Math.Max(0, totalRows); + } + + /// Total rows to process (sum of all list counts in ). + public long TotalRows { get; } + + /// Rows materialized so far by producers. + public long MaterializedRows => Interlocked.Read(ref _materializedRows); + + /// Rows written so far by the destination writer. + public long WrittenRows => Interlocked.Read(ref _writtenRows); + + /// Number of batches currently in the queue (for backpressure). + public int QueuedBatchCount => Volatile.Read(ref _queuedBatchCount); + + private long _materializedRows; + private long _writtenRows; + private int _queuedBatchCount; + + /// Called by producers when a batch is added to the queue. + public void AddMaterialized(int count) => Interlocked.Add(ref _materializedRows, count); + + /// Called by the writer when a batch is written. + public void AddWritten(int count) => Interlocked.Add(ref _writtenRows, count); + + /// Called when a batch is enqueued. + public void IncrementQueuedBatches() => Interlocked.Increment(ref _queuedBatchCount); + + /// Called when a batch is dequeued by the writer. + public void DecrementQueuedBatches() => Interlocked.Decrement(ref _queuedBatchCount); +} + +/// +/// Summary counts and timings returned from the export pipeline for CLI reporting. +/// Row counts match list counts; timings are in milliseconds. +/// +public sealed class ExportCounts +{ + /// Number of native objects written. + public int NativeObjects; + + /// Number of managed objects written. + public int ManagedObjects; + + /// Number of connections written. + public int Connections; + + /// Number of native roots written. + public int NativeRoots; + + /// Number of memory regions written. + public int MemoryRegions; + + /// Number of native allocations written. + public int NativeAllocations; + + /// Time spent materializing batches (ms). + public long MaterializeMs; + + /// Time spent in the writer (ms). + public long WriteMs; + + /// Total export time (ms); typically set by the CLI after the run. + public long TotalMs; + + /// Backend total insert time (ms). + public long BackendInsertMs; + + /// Backend commit time (ms). + public long BackendCommitMs; + + /// Backend index build time (ms). + public long BackendIndexBuildMs; + + /// Per-table insert times (ms). + public long NativeObjectInsertMs; + + /// Per-table insert times (ms). + public long ManagedObjectInsertMs; + + /// Per-table insert times (ms). + public long ConnectionInsertMs; + + /// Per-table insert times (ms). + public long NativeRootInsertMs; + + /// Per-table insert times (ms). + public long MemoryRegionInsertMs; + + /// Per-table insert times (ms). + public long NativeAllocationInsertMs; +} + +/// +/// Per-run statistics returned by : +/// row counts and timings for inserts, commit, and index build. +/// +public sealed class WriteStats +{ + /// Rows written per table. + public long NativeObjectRows; + + /// Rows written per table. + public long ManagedObjectRows; + + /// Rows written per table. + public long ConnectionRows; + + /// Rows written per table. + public long NativeRootRows; + + /// Rows written per table. + public long MemoryRegionRows; + + /// Rows written per table. + public long NativeAllocationRows; + + /// Insert time per table (ms). + public long NativeObjectInsertMs; + + /// Insert time per table (ms). + public long ManagedObjectInsertMs; + + /// Insert time per table (ms). + public long ConnectionInsertMs; + + /// Insert time per table (ms). + public long NativeRootInsertMs; + + /// Insert time per table (ms). + public long MemoryRegionInsertMs; + + /// Insert time per table (ms). + public long NativeAllocationInsertMs; + + /// Total time spent in inserts (ms). + public long TotalInsertMs; + + /// Commit/sync time (ms). + public long CommitMs; + + /// Index build time (ms). + public long IndexBuildMs; +} diff --git a/Core/Models/IProgressReporter.cs b/Core/Models/IProgressReporter.cs new file mode 100644 index 0000000..c40ce62 --- /dev/null +++ b/Core/Models/IProgressReporter.cs @@ -0,0 +1,16 @@ +namespace MemorySnapshotDataTools; + +/// +/// Abstraction for progress and status reporting during long-running operations. +/// Implemented by the CLI (e.g. ConsoleProgress) and passed into Core APIs +/// so that extraction, export, and report steps can report progress without depending on the host. +/// +public interface IProgressReporter +{ + /// + /// Report a progress or status message. + /// + /// Message to report (e.g. "Extracting...", "Written 10000 rows"). + /// If true, report immediately; otherwise the implementation may throttle (e.g. by time). + void Report(string message, bool force = false); +} diff --git a/Core/Models/Options.cs b/Core/Models/Options.cs new file mode 100644 index 0000000..70ca46e --- /dev/null +++ b/Core/Models/Options.cs @@ -0,0 +1,63 @@ +namespace MemorySnapshotDataTools; + +/// +/// How much validation to run after writing the database (counts only, or full referential checks). +/// +public enum ValidationMode +{ + /// Skip validation. + None, + + /// Verify row counts match extracted data. + Minimal, + + /// Counts plus duplicate-key and orphan/reference checks. + Full, +} + +/// +/// Which database backend to use for export (DuckDB or SQLite). +/// +public enum DestinationKind +{ + /// Export to a DuckDB database (.duckdb). + DuckDb, + + /// Export to a SQLite database (.db). + Sqlite, +} + +/// +/// Options for the export pipeline. Created by the CLI from parsed arguments and passed to +/// . +/// +public sealed class ExportRunOptions +{ + /// Output database file path (.duckdb or .db). + public string OutputDbPath { get; set; } = string.Empty; + + /// Number of rows per batch produced by the pipeline (default 2048). + public int BatchSize { get; set; } = 2048; + + /// Maximum number of batches that can be queued between producers and the writer (default 256). + public int QueueCapacity { get; set; } = 256; + + /// Validation to run after write (default ). + public ValidationMode Validate { get; set; } = ValidationMode.Minimal; +} + +/// +/// Options for report generation. Created by the CLI from parsed arguments and passed to +/// . +/// +public sealed class ReportRunOptions +{ + /// Path to the exported database (DuckDB or SQLite). + public string ReportDbPath { get; set; } = string.Empty; + + /// Output HTML path; if null, a temp file is used and the report is opened in the browser. + public string? ReportOutputPath { get; set; } + + /// Title shown in the generated report (default "Memory Snapshot Report"). + public string ReportTitle { get; set; } = "Memory Snapshot Report"; +} diff --git a/Core/Models/SnapshotData.cs b/Core/Models/SnapshotData.cs new file mode 100644 index 0000000..e7d334e --- /dev/null +++ b/Core/Models/SnapshotData.cs @@ -0,0 +1,49 @@ +namespace MemorySnapshotDataTools; + +/// +/// In-memory container for all data extracted from a Unity memory snapshot (.snap). +/// Produced by and consumed by the export pipeline and validation. +/// +public sealed class RawSnapshotData +{ + /// Metadata about the snapshot (path, export time, Unity version). + public SnapshotInfo SnapshotInfo { get; set; } = new(); + + /// Native Unity objects (e.g. textures, GameObjects). + public List NativeObjects { get; } = []; + + /// Managed heap objects. + public List ManagedObjects { get; } = []; + + /// Edges between objects (from_kind/from_index → to_kind/to_index). + public List Connections { get; } = []; + + /// Native root references (e.g. Scene, DontDestroyOnLoad) with accumulated sizes. + public List NativeRoots { get; } = []; + + /// Native memory regions (hierarchy and address ranges). + public List MemoryRegions { get; } = []; + + /// Allocations within native memory regions. + public List NativeAllocations { get; } = []; + + /// Total number of data rows (all lists combined); used for pipeline progress. + public long TotalRows => NativeObjects.Count + ManagedObjects.Count + Connections.Count + + NativeRoots.Count + MemoryRegions.Count + NativeAllocations.Count; +} + +/// +/// Metadata for a snapshot: path, when it was exported, and Unity version string. +/// Stored in the snapshot_info table and carried in . +/// +public sealed class SnapshotInfo +{ + /// Path to the source .snap file. + public string SnapshotPath { get; set; } = string.Empty; + + /// When the snapshot was exported (UTC), as a string for display/storage. + public string ExportedAtUtc { get; set; } = string.Empty; + + /// Unity version or format string from the snapshot. + public string UnityVersion { get; set; } = string.Empty; +} diff --git a/Core/Models/SnapshotRows.cs b/Core/Models/SnapshotRows.cs new file mode 100644 index 0000000..fc0c577 --- /dev/null +++ b/Core/Models/SnapshotRows.cs @@ -0,0 +1,145 @@ +namespace MemorySnapshotDataTools; + +/// +/// One row from the native_objects table: a native Unity object (texture, GameObject, etc.). +/// +public struct NativeObjectRow +{ + /// Zero-based index in the native objects list. + public int NativeObjectIndex; + + /// Instance ID string (e.g. from Unity). + public string InstanceId; + + /// Display name. + public string Name; + + /// Size in bytes. + public ulong SizeBytes; + + /// Index into the native type names array. + public int TypeIndex; + + /// Resolved native type name (e.g. "Texture2D", "GameObject"). + public string NativeTypeName; + + /// Whether the object is marked destroyed. + public bool IsDestroyed; +} + +/// +/// One row from the managed_objects table: a managed heap object. +/// +public struct ManagedObjectRow +{ + /// Zero-based index in the managed objects list. + public int ManagedObjectIndex; + + /// Address on the managed heap. + public ulong Address; + + /// Size in bytes. + public long SizeBytes; + + /// Index into the managed type descriptions. + public int TypeIndex; + + /// Resolved managed type name. + public string ManagedTypeName; + + /// Linked native object index, or -1 if none. + public long NativeObjectIndex; +} + +/// +/// One row from the connections table: an edge between two objects (e.g. reference, field). +/// +public struct ConnectionRow +{ + /// Source kind: "native_object" or "managed_object". + public string FromKind; + + /// Source object index (native_object_index or managed_object_index). + public long FromIndex; + + /// Target kind: "native_object" or "managed_object". + public string ToKind; + + /// Target object index. + public long ToIndex; + + /// Connection type label (e.g. "GCHandle", "Field"). + public string ConnectionType; +} + +/// +/// One row from the native_roots table: a root reference (e.g. Scene, DontDestroyOnLoad) with accumulated size. +/// +public struct NativeRootRow +{ + /// Zero-based root index. + public int RootIndex; + + /// Root ID from the snapshot. + public long RootId; + + /// Area name (e.g. "Scene", "DontDestroyOnLoad"). + public string AreaName; + + /// Object name for the root. + public string ObjectName; + + /// Accumulated size in bytes for this root. + public ulong AccumulatedSizeBytes; +} + +/// +/// One row from the memory_regions table: a native memory region (address range, hierarchy). +/// +public struct MemoryRegionRow +{ + /// Zero-based region index. + public int RegionIndex; + + /// Base address of the region. + public ulong AddressBase; + + /// Size of the region in bytes. + public ulong AddressSize; + + /// Region name or label. + public string Name; + + /// Parent region index, or -1 if none. + public int ParentRegionIndex; + + /// Index of the first allocation in this region, or -1. + public int FirstAllocationIndex; + + /// Number of allocations in this region. + public int NumAllocations; +} + +/// +/// One row from the native_allocations table: an allocation within a native memory region. +/// +public struct NativeAllocationRow +{ + /// Zero-based allocation index. + public int AllocationIndex; + + /// Allocation address. + public ulong Address; + + /// Size in bytes. + public ulong SizeBytes; + + /// Overhead size in bytes. + public ulong OverheadSizeBytes; + + /// Padding size in bytes. + public ulong PaddingSizeBytes; + + /// Containing memory region index, or -1. + public int MemoryRegionIndex; +} diff --git a/Core/Parser/ManagedSnapshotCrawler.cs b/Core/Parser/ManagedSnapshotCrawler.cs new file mode 100644 index 0000000..b8501e9 --- /dev/null +++ b/Core/Parser/ManagedSnapshotCrawler.cs @@ -0,0 +1,598 @@ +using System.Buffers.Binary; +using MemorySnapshotDataTools; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Result of crawling the managed heap: discovered managed objects, connections between them (and to native objects), and address-to-index map. +/// +internal sealed class ManagedCrawlResult +{ + /// Discovered managed heap objects (index, address, size, type, native link). + public List ManagedObjects { get; } = []; + + /// Edges from the crawl: managed-to-managed, managed-to-native, native-to-managed. + public List ManagedConnections { get; } = []; + + /// Map from managed heap address to . + public Dictionary ManagedIndexByAddress { get; } = []; +} + +/// +/// Crawls the managed heap from a : starts from GC handle roots, follows references, +/// parses object headers and fields/arrays, and produces with objects and connections. +/// +internal sealed class ManagedSnapshotCrawler +{ + private const int TypeFlagValueType = 1 << 0; + private const int TypeFlagArray = 1 << 1; + private const int TypeFlagArrayRankMask = unchecked((int)0xFFFF0000); + + private readonly DecodedSnapshot _snapshot; + private readonly DecodedVirtualMachineInfo _vm; + private readonly List _sections; + private readonly Dictionary _typeInfoToIndex; + private readonly Dictionary _managedAddressToNativeObjectIndex; + private readonly Dictionary _instanceFieldIndexCache = []; + private readonly Queue _crawlQueue = new(); + private readonly ManagedCrawlResult _result = new(); + private readonly HashSet _edgeDedup = []; + + /// Builds the crawler for the given decoded snapshot. Validates VM pointer size and builds heap sections and type/native maps. + /// Decoded snapshot (must include heap sections and type descriptions). + /// If pointer size is not 4 or 8. + public ManagedSnapshotCrawler(DecodedSnapshot snapshot) + { + _snapshot = snapshot; + _vm = snapshot.VirtualMachineInformation; + if (_vm.PointerSize is not 4 and not 8) + throw new InvalidOperationException($"Unsupported VM pointer size: {_vm.PointerSize}"); + + _sections = BuildManagedHeapSections(snapshot); + _typeInfoToIndex = BuildTypeInfoIndex(snapshot); + _managedAddressToNativeObjectIndex = BuildManagedAddressToNativeMap(snapshot); + } + + /// + /// Crawls the managed heap starting from GC handle roots, discovers all reachable managed objects and their references, and returns the result. + /// + /// Decoded snapshot with heap sections and type metadata. + /// Managed objects, connections, and address-to-index map. + public static ManagedCrawlResult Crawl(DecodedSnapshot snapshot) + { + var crawler = new ManagedSnapshotCrawler(snapshot); + return crawler.CrawlInternal(); + } + + private ManagedCrawlResult CrawlInternal() + { + for (var gcHandleIndex = 0; gcHandleIndex < _snapshot.GcHandleTargets.Length; gcHandleIndex++) + { + var address = _snapshot.GcHandleTargets[gcHandleIndex]; + if (address == 0) + continue; + TryEnsureManagedObject(address, $"gc-handle[{gcHandleIndex}]"); + } + + while (_crawlQueue.Count > 0) + { + var address = _crawlQueue.Dequeue(); + var sourceManagedIndex = _result.ManagedIndexByAddress[address]; + var source = _result.ManagedObjects[sourceManagedIndex]; + var sourceTypeIndex = source.TypeIndex; + + foreach (var targetAddress in EnumerateOutgoingManagedReferences(address, sourceTypeIndex)) + { + if (targetAddress == 0) + continue; + if (TryEnsureManagedObject(targetAddress, $"reference from managed index {sourceManagedIndex}") is { } targetManagedIndex) + AddManagedEdge(sourceManagedIndex, targetManagedIndex, "managed_reference"); + } + + if (source.NativeObjectIndex >= 0) + { + AddManagedToNativeEdge(sourceManagedIndex, source.NativeObjectIndex, "managed_native_bridge"); + AddNativeToManagedEdge(source.NativeObjectIndex, sourceManagedIndex, "native_gc_handle_bridge"); + } + } + + return _result; + } + + /// Returns managed object index if the object was added or already present; null if type could not be resolved (object skipped). + private int? TryEnsureManagedObject(ulong address, string reason) + { + if (_result.ManagedIndexByAddress.TryGetValue(address, out var existing)) + return existing; + + var parsed = ParseManagedObjectHeader(address, reason); + if (parsed is null) + return null; + + var managedIndex = _result.ManagedObjects.Count; + _result.ManagedIndexByAddress[address] = managedIndex; + _result.ManagedObjects.Add(new ManagedObjectRow + { + ManagedObjectIndex = managedIndex, + Address = address, + SizeBytes = parsed.Value.SizeBytes, + TypeIndex = parsed.Value.TypeIndex, + ManagedTypeName = _snapshot.ManagedTypeNames[parsed.Value.TypeIndex] ?? string.Empty, + NativeObjectIndex = _managedAddressToNativeObjectIndex.TryGetValue(address, out var nativeObjectIndex) ? nativeObjectIndex : -1, + }); + _crawlQueue.Enqueue(address); + return managedIndex; + } + + private ParsedManagedObject? ParseManagedObjectHeader(ulong address, string reason) + { + if (!TryReadPointer(address, out var ptrIdentity)) + return null; + if (!TryResolveTypeIndex(ptrIdentity, reason, out var typeIndex)) + return null; + var sizeBytes = ComputeObjectSizeBytes(address, typeIndex, reason); + if (sizeBytes <= 0) + return null; + if (!TryGetReadableWindow(address, checked((ulong)sizeBytes), out _, out _)) + return null; + return new ParsedManagedObject(typeIndex, sizeBytes); + } + + private bool TryResolveTypeIndex(ulong ptrIdentity, string reason, out int typeIndex) + { + typeIndex = 0; + if (_typeInfoToIndex.TryGetValue(ptrIdentity, out var direct)) + { + typeIndex = direct; + return true; + } + + if (!TryReadPointer(ptrIdentity, out var typeInfoPtr)) + return false; + + if (_typeInfoToIndex.TryGetValue(typeInfoPtr, out var indirect)) + { + typeIndex = indirect; + return true; + } + return false; + } + + private long ComputeObjectSizeBytes(ulong address, int typeIndex, string reason) + { + EnsureValidTypeIndex(typeIndex, reason); + if (IsArrayType(typeIndex)) + { + var length = ReadArrayLength(address, typeIndex, reason); + var elementTypeIndex = _snapshot.ManagedTypeBaseOrElementTypeIndices[typeIndex]; + if (elementTypeIndex < 0) + elementTypeIndex = typeIndex; + EnsureValidTypeIndex(elementTypeIndex, reason); + + var elementSize = IsValueType(elementTypeIndex) + ? _snapshot.ManagedTypeSizes[elementTypeIndex] + : checked((int)_vm.PointerSize); + if (elementSize < 0) + throw new InvalidOperationException($"Negative array element size for type '{GetTypeName(elementTypeIndex)}'. reason={reason}"); + + return checked((long)_vm.ArrayHeaderSize + checked((long)elementSize * length)); + } + + if (IsStringType(typeIndex)) + { + var length = ReadInt32Strict(address + _vm.ObjectHeaderSize, $"string length for {reason}"); + if (length < 0) + throw new InvalidOperationException($"Negative string length {length} at 0x{address:X16}. reason={reason}"); + return checked((long)_vm.ObjectHeaderSize + 4L + checked((long)length * 2L) + 2L); + } + + var typeSize = _snapshot.ManagedTypeSizes[typeIndex]; + if (typeSize < 0) + throw new InvalidOperationException($"Negative type size {typeSize} for '{GetTypeName(typeIndex)}'. reason={reason}"); + return IsValueType(typeIndex) + ? checked(typeSize + (long)_vm.ObjectHeaderSize) + : typeSize; + } + + private long ReadArrayLength(ulong address, int arrayTypeIndex, string reason) + { + var bounds = ReadPointerStrict(address + _vm.ArrayBoundsOffsetInHeader, $"array bounds for {reason}"); + if (bounds == 0) + return ReadInt32Strict(address + _vm.ArraySizeOffsetInHeader, $"array size for {reason}"); + + var rank = (_snapshot.ManagedTypeFlags[arrayTypeIndex] & TypeFlagArrayRankMask) >> 16; + if (rank <= 0) + throw new InvalidOperationException($"Invalid array rank {rank} for '{GetTypeName(arrayTypeIndex)}'. reason={reason}"); + + long length = 1; + for (var i = 0; i < rank; i++) + { + var dimensionLength = ReadInt32Strict(bounds + (ulong)(i * 8), $"array rank[{i}] length for {reason}"); + if (dimensionLength < 0) + throw new InvalidOperationException($"Negative array dimension length {dimensionLength} for '{GetTypeName(arrayTypeIndex)}'. reason={reason}"); + length = checked(length * dimensionLength); + } + + return length; + } + + private IEnumerable EnumerateOutgoingManagedReferences(ulong objectAddress, int objectTypeIndex) + { + if (IsStringType(objectTypeIndex)) + yield break; + + if (IsArrayType(objectTypeIndex)) + { + foreach (var reference in EnumerateArrayReferences(objectAddress, objectTypeIndex)) + yield return reference; + yield break; + } + + foreach (var reference in EnumerateReferenceTypeFieldReferences(objectAddress, objectTypeIndex)) + yield return reference; + } + + private IEnumerable EnumerateArrayReferences(ulong arrayAddress, int arrayTypeIndex) + { + var length = ReadArrayLength(arrayAddress, arrayTypeIndex, $"array refs for '{GetTypeName(arrayTypeIndex)}'"); + if (length == 0) + yield break; + + var elementTypeIndex = _snapshot.ManagedTypeBaseOrElementTypeIndices[arrayTypeIndex]; + if (elementTypeIndex < 0) + elementTypeIndex = arrayTypeIndex; + EnsureValidTypeIndex(elementTypeIndex, $"array element of {GetTypeName(arrayTypeIndex)}"); + + var arrayDataAddress = checked(arrayAddress + _vm.ArrayHeaderSize); + if (IsValueType(elementTypeIndex)) + { + var elementSize = _snapshot.ManagedTypeSizes[elementTypeIndex]; + if (elementSize < 0) + throw new InvalidOperationException($"Negative value-type array element size for '{GetTypeName(elementTypeIndex)}'."); + + for (long i = 0; i < length; i++) + { + var elementAddress = checked(arrayDataAddress + checked((ulong)(i * elementSize))); + foreach (var reference in EnumerateValueTypeReferences(elementAddress, elementTypeIndex, recursionDepth: 0)) + yield return reference; + } + } + else + { + for (long i = 0; i < length; i++) + { + var ptrAddress = checked(arrayDataAddress + checked((ulong)(i * (long)_vm.PointerSize))); + var targetAddress = ReadPointerStrict(ptrAddress, $"array element pointer for '{GetTypeName(arrayTypeIndex)}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private IEnumerable EnumerateReferenceTypeFieldReferences(ulong objectAddress, int typeIndex) + { + var instanceFields = GetInstanceFieldIndices(typeIndex); + for (var instanceFieldIdx = 0; instanceFieldIdx < instanceFields.Length; instanceFieldIdx++) + { + var fieldIndex = instanceFields[instanceFieldIdx]; + if ((uint)fieldIndex >= (uint)_snapshot.FieldTypeIndices.Length) + throw new InvalidOperationException($"Field index out of range: {fieldIndex} for type '{GetTypeName(typeIndex)}'."); + if (_snapshot.FieldIsStatic[fieldIndex] != 0) + continue; + + var fieldOffset = _snapshot.FieldOffsets[fieldIndex]; + if (fieldOffset < 0) + continue; + + var fieldTypeIndex = _snapshot.FieldTypeIndices[fieldIndex]; + EnsureValidTypeIndex(fieldTypeIndex, $"field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(typeIndex)}'"); + + var fieldAddress = checked(objectAddress + (ulong)fieldOffset); + if (IsValueType(fieldTypeIndex)) + { + foreach (var reference in EnumerateValueTypeReferences(fieldAddress, fieldTypeIndex, recursionDepth: 0)) + yield return reference; + } + else + { + var targetAddress = ReadPointerStrict(fieldAddress, $"field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(typeIndex)}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private IEnumerable EnumerateValueTypeReferences(ulong valueBaseAddress, int valueTypeIndex, int recursionDepth) + { + if (recursionDepth > 24) + throw new InvalidOperationException($"Value-type recursion depth exceeded for '{GetTypeName(valueTypeIndex)}'."); + + var instanceFields = GetInstanceFieldIndices(valueTypeIndex); + for (var instanceFieldIdx = 0; instanceFieldIdx < instanceFields.Length; instanceFieldIdx++) + { + var fieldIndex = instanceFields[instanceFieldIdx]; + if ((uint)fieldIndex >= (uint)_snapshot.FieldTypeIndices.Length) + throw new InvalidOperationException($"Value-type field index out of range: {fieldIndex} for '{GetTypeName(valueTypeIndex)}'."); + if (_snapshot.FieldIsStatic[fieldIndex] != 0) + continue; + + var adjustedOffset = _snapshot.FieldOffsets[fieldIndex] - (int)_vm.ObjectHeaderSize; + if (adjustedOffset < 0) + continue; + + var fieldTypeIndex = _snapshot.FieldTypeIndices[fieldIndex]; + EnsureValidTypeIndex(fieldTypeIndex, $"value-type field '{_snapshot.FieldNames[fieldIndex]}' on '{GetTypeName(valueTypeIndex)}'"); + + var fieldAddress = checked(valueBaseAddress + (ulong)adjustedOffset); + if (IsValueType(fieldTypeIndex)) + { + if (fieldTypeIndex == valueTypeIndex) + continue; + foreach (var nested in EnumerateValueTypeReferences(fieldAddress, fieldTypeIndex, recursionDepth + 1)) + yield return nested; + } + else + { + var targetAddress = ReadPointerStrict(fieldAddress, $"value-type field '{_snapshot.FieldNames[fieldIndex]}'"); + if (targetAddress != 0) + yield return targetAddress; + } + } + } + + private int[] GetInstanceFieldIndices(int typeIndex) + { + EnsureValidTypeIndex(typeIndex, "enumerate fields"); + if (_instanceFieldIndexCache.TryGetValue(typeIndex, out var cached)) + return cached; + + var chain = new List(8); + var visited = new HashSet(); + var current = typeIndex; + while (current >= 0) + { + if (!visited.Add(current)) + throw new InvalidOperationException($"Cyclic managed base-type chain detected at type index {current}."); + chain.Add(current); + current = _snapshot.ManagedTypeBaseOrElementTypeIndices[current]; + } + + var fields = new List(16); + for (var i = chain.Count - 1; i >= 0; i--) + { + var chainTypeIndex = chain[i]; + var fieldIndices = _snapshot.ManagedTypeFieldIndices[chainTypeIndex]; + for (var fieldIndex = 0; fieldIndex < fieldIndices.Length; fieldIndex++) + fields.Add(fieldIndices[fieldIndex]); + } + + cached = fields.ToArray(); + _instanceFieldIndexCache[typeIndex] = cached; + return cached; + } + + private void AddManagedEdge(long fromManagedIndex, long toManagedIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromManagedIndex, toManagedIndex, EdgeType.ManagedToManaged))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "managed_object", + FromIndex = fromManagedIndex, + ToKind = "managed_object", + ToIndex = toManagedIndex, + ConnectionType = type, + }); + } + } + + private void AddManagedToNativeEdge(long fromManagedIndex, long toNativeIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromManagedIndex, toNativeIndex, EdgeType.ManagedToNative))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "managed_object", + FromIndex = fromManagedIndex, + ToKind = "native_object", + ToIndex = toNativeIndex, + ConnectionType = type, + }); + } + } + + private void AddNativeToManagedEdge(long fromNativeIndex, long toManagedIndex, string type) + { + if (_edgeDedup.Add(new EdgeKey(fromNativeIndex, toManagedIndex, EdgeType.NativeToManaged))) + { + _result.ManagedConnections.Add(new ConnectionRow + { + FromKind = "native_object", + FromIndex = fromNativeIndex, + ToKind = "managed_object", + ToIndex = toManagedIndex, + ConnectionType = type, + }); + } + } + + private bool IsArrayType(int typeIndex) => (_snapshot.ManagedTypeFlags[typeIndex] & TypeFlagArray) != 0; + + private bool IsValueType(int typeIndex) => (_snapshot.ManagedTypeFlags[typeIndex] & TypeFlagValueType) != 0; + + private bool IsStringType(int typeIndex) + => string.Equals(_snapshot.ManagedTypeNames[typeIndex], "System.String", StringComparison.Ordinal); + + private string GetTypeName(int typeIndex) + => typeIndex >= 0 && typeIndex < _snapshot.ManagedTypeNames.Length + ? _snapshot.ManagedTypeNames[typeIndex] ?? string.Empty + : $"type#{typeIndex}"; + + private void EnsureValidTypeIndex(int typeIndex, string reason) + { + if (typeIndex < 0 || typeIndex >= _snapshot.ManagedTypeNames.Length) + throw new InvalidOperationException($"Invalid managed type index {typeIndex}. reason={reason}"); + } + + private ulong ReadPointerStrict(ulong address, string reason) + { + if (!TryReadPointer(address, out var value)) + throw new InvalidOperationException($"Unable to read pointer at 0x{address:X16}. reason={reason}"); + return value; + } + + private int ReadInt32Strict(ulong address, string reason) + { + if (!TryReadInt32(address, out var value)) + throw new InvalidOperationException($"Unable to read int32 at 0x{address:X16}. reason={reason}"); + return value; + } + + private void EnsureReadable(ulong address, long byteCount, string reason) + { + if (byteCount < 0) + throw new InvalidOperationException($"Negative readability check size {byteCount}. reason={reason}"); + if (!TryGetReadableWindow(address, checked((ulong)byteCount), out _, out _)) + throw new InvalidOperationException($"Managed heap read out of range at 0x{address:X16} len={byteCount}. reason={reason}"); + } + + private bool TryReadPointer(ulong address, out ulong value) + { + value = 0; + if (!TryGetReadableWindow(address, _vm.PointerSize, out var section, out var offset)) + return false; + + if (_vm.PointerSize == 8) + { + value = BinaryPrimitives.ReadUInt64LittleEndian(section.Bytes.AsSpan(offset, 8)); + return true; + } + + value = BinaryPrimitives.ReadUInt32LittleEndian(section.Bytes.AsSpan(offset, 4)); + return true; + } + + private bool TryReadInt32(ulong address, out int value) + { + value = 0; + if (!TryGetReadableWindow(address, 4, out var section, out var offset)) + return false; + value = BinaryPrimitives.ReadInt32LittleEndian(section.Bytes.AsSpan(offset, 4)); + return true; + } + + private bool TryGetReadableWindow(ulong address, ulong byteCount, out ManagedHeapSection section, out int offsetInSection) + { + section = default; + offsetInSection = 0; + if (_sections.Count == 0) + return false; + + var sectionIndex = FindSectionIndex(address); + if (sectionIndex < 0) + return false; + + var candidate = _sections[sectionIndex]; + var localOffset = checked((long)(address - candidate.StartAddress)); + if (localOffset < 0 || localOffset > candidate.Bytes.Length) + return false; + + if (byteCount > 0 && checked((ulong)localOffset + byteCount) > (ulong)candidate.Bytes.Length) + return false; + + section = candidate; + offsetInSection = (int)localOffset; + return true; + } + + private int FindSectionIndex(ulong address) + { + var lo = 0; + var hi = _sections.Count - 1; + var found = -1; + while (lo <= hi) + { + var mid = lo + ((hi - lo) / 2); + var start = _sections[mid].StartAddress; + if (start <= address) + { + found = mid; + lo = mid + 1; + } + else + { + hi = mid - 1; + } + } + + if (found < 0) + return -1; + + var section = _sections[found]; + return address < section.EndAddressExclusive ? found : -1; + } + + private static Dictionary BuildTypeInfoIndex(DecodedSnapshot snapshot) + { + var map = new Dictionary(snapshot.ManagedTypeInfoAddresses.Length); + for (var i = 0; i < snapshot.ManagedTypeInfoAddresses.Length; i++) + { + var typeInfoAddress = snapshot.ManagedTypeInfoAddresses[i]; + if (typeInfoAddress == 0) + continue; + map.TryAdd(typeInfoAddress, i); + } + return map; + } + + private static Dictionary BuildManagedAddressToNativeMap(DecodedSnapshot snapshot) + { + var gcHandleToNativeObject = new Dictionary(snapshot.NativeObjectGcHandleIndices.Length); + for (var nativeIndex = 0; nativeIndex < snapshot.NativeObjectGcHandleIndices.Length; nativeIndex++) + { + var gcHandleIndex = snapshot.NativeObjectGcHandleIndices[nativeIndex]; + if (gcHandleIndex >= 0) + gcHandleToNativeObject.TryAdd(gcHandleIndex, nativeIndex); + } + + var map = new Dictionary(gcHandleToNativeObject.Count); + foreach (var (gcHandleIndex, nativeObjectIndex) in gcHandleToNativeObject) + { + if (gcHandleIndex < 0 || gcHandleIndex >= snapshot.GcHandleTargets.Length) + continue; + var address = snapshot.GcHandleTargets[gcHandleIndex]; + if (address != 0) + map[address] = nativeObjectIndex; + } + + return map; + } + + private static List BuildManagedHeapSections(DecodedSnapshot snapshot) + { + var sections = new List(snapshot.ManagedHeapSectionStartAddresses.Length); + for (var i = 0; i < snapshot.ManagedHeapSectionStartAddresses.Length; i++) + { + sections.Add(new ManagedHeapSection(snapshot.ManagedHeapSectionStartAddresses[i], snapshot.ManagedHeapSectionBytes[i])); + } + + sections.Sort((a, b) => a.StartAddress.CompareTo(b.StartAddress)); + return sections; + } + + private readonly record struct ParsedManagedObject(int TypeIndex, long SizeBytes); + + private readonly record struct ManagedHeapSection(ulong StartAddress, byte[] Bytes) + { + public ulong EndAddressExclusive => StartAddress + (ulong)Bytes.Length; + } + + private enum EdgeType : byte + { + ManagedToManaged = 0, + ManagedToNative = 1, + NativeToManaged = 2, + } + + private readonly record struct EdgeKey(long FromIndex, long ToIndex, EdgeType Type); +} diff --git a/Core/Parser/SnapDataModel.cs b/Core/Parser/SnapDataModel.cs new file mode 100644 index 0000000..3984c62 --- /dev/null +++ b/Core/Parser/SnapDataModel.cs @@ -0,0 +1,239 @@ +namespace MemorySnapshotDataTools.Parser; + +/// Format of a snapshot entry: how element count and data are stored. +internal enum SnapEntryFormat : ushort +{ + /// Entry not present. + Undefined = 0, + + /// Single value; size stored in metadata. + SingleElement = 1, + + /// Fixed-size elements; count and element size in metadata. + ConstantSizeElementArray = 2, + + /// Variable-length elements; offsets array defines boundaries. + DynamicSizeElementArray = 3, +} + +/// Identifiers for snapshot file sections (metadata, native types, connections, heap, etc.). +internal enum SnapEntryType : ushort +{ + Metadata_Version = 0, + Metadata_RecordDate = 1, + Metadata_VirtualMachineInformation = 4, + NativeTypes_Name = 5, + NativeTypes_NativeBaseTypeArrayIndex = 6, + NativeObjects_NativeTypeArrayIndex = 7, + NativeObjects_HideFlags = 8, + NativeObjects_Flags = 9, + NativeObjects_InstanceId = 10, + NativeObjects_Name = 11, + NativeObjects_Size = 13, + GCHandles_Target = 15, + Connections_From = 16, + Connections_To = 17, + ManagedHeapSections_StartAddress = 18, + ManagedHeapSections_Bytes = 19, + TypeDescriptions_Flags = 22, + TypeDescriptions_Name = 23, + TypeDescriptions_Assembly = 24, + TypeDescriptions_FieldIndices = 25, + TypeDescriptions_BaseOrElementTypeIndex = 27, + TypeDescriptions_Size = 28, + TypeDescriptions_TypeInfoAddress = 29, + FieldDescriptions_Offset = 31, + FieldDescriptions_TypeIndex = 32, + FieldDescriptions_Name = 33, + FieldDescriptions_IsStatic = 34, + NativeRootReferences_Id = 35, + NativeRootReferences_AreaName = 36, + NativeRootReferences_ObjectName = 37, + NativeRootReferences_AccumulatedSize = 38, + NativeAllocations_MemoryRegionIndex = 39, + NativeAllocations_Address = 42, + NativeAllocations_Size = 43, + NativeAllocations_OverheadSize = 44, + NativeAllocations_PaddingSize = 45, + NativeMemoryRegions_Name = 46, + NativeMemoryRegions_ParentIndex = 47, + NativeMemoryRegions_AddressBase = 48, + NativeMemoryRegions_AddressSize = 49, + NativeMemoryRegions_FirstAllocationIndex = 50, + NativeMemoryRegions_NumAllocations = 51, + NativeMemoryLabels_Name = 52, + NativeObjects_GCHandleIndex = 58, + NativeObjects_GCHandleIndex_Legacy = 62, +} + +/// Format version constants used when decoding snapshot entries (e.g. instance IDs, heap sections). +internal static class SnapFormatVersion +{ + /// Version at which native connections use instance IDs. + public const uint NativeConnectionsAsInstanceIdsVersion = 10; + + /// Version at which entity IDs are 8-byte structs. + public const uint EntityIDAs8ByteStructs = 18; + + /// Version for memory label size and heap ID in heap section metadata. + public const uint MemLabelSizeAndHeapIdVersion = 12; +} + +/// +/// Decoded virtual machine layout from snapshot metadata (pointer size, header layout, allocation granularity). +/// Used by to interpret managed heap layout. +/// +public sealed class DecodedVirtualMachineInfo +{ + /// Size of a pointer in bytes (4 or 8). + public uint PointerSize { get; set; } + + /// Object header size in bytes. + public uint ObjectHeaderSize { get; set; } + + /// Array object header size in bytes. + public uint ArrayHeaderSize { get; set; } + + /// Offset of array bounds in the array header. + public uint ArrayBoundsOffsetInHeader { get; set; } + + /// Offset of array length/size in the array header. + public uint ArraySizeOffsetInHeader { get; set; } + + /// Allocation granularity in bytes. + public uint AllocationGranularity { get; set; } +} + +/// +/// Fully decoded in-memory snapshot: all native and managed metadata and raw arrays as read from the .snap file. +/// Produced by and consumed by and . +/// +public sealed class DecodedSnapshot +{ + /// Snapshot format version from metadata. + public uint FormatVersion { get; set; } + + /// Record date in .NET ticks (UTC). + public long RecordDateTicksUtc { get; set; } + + /// Native type display names. + public string[] NativeTypeNames { get; set; } = []; + + /// Per-native-object index into . + public int[] NativeObjectTypeIndices { get; set; } = []; + + /// Per-native-object instance ID. + public ulong[] NativeObjectInstanceIds { get; set; } = []; + + /// Per-native-object name. + public string[] NativeObjectNames { get; set; } = []; + + /// Per-native-object size in bytes. + public ulong[] NativeObjectSizes { get; set; } = []; + + /// Per-native-object flags (e.g. destroyed). + public int[] NativeObjectFlags { get; set; } = []; + + /// Per-native-object GC handle index, or -1. + public int[] NativeObjectGcHandleIndices { get; set; } = []; + + /// GC handle target addresses (managed heap). + public ulong[] GcHandleTargets { get; set; } = []; + + /// Connection source unified indices. + public int[] ConnectionsFrom { get; set; } = []; + + /// Connection target unified indices. + public int[] ConnectionsTo { get; set; } = []; + + /// Native root reference IDs. + public long[] NativeRootIds { get; set; } = []; + + /// Native root area names (e.g. Scene, DontDestroyOnLoad). + public string[] NativeRootAreaNames { get; set; } = []; + + /// Native root object names. + public string[] NativeRootObjectNames { get; set; } = []; + + /// Native root accumulated sizes in bytes. + public ulong[] NativeRootAccumulatedSizes { get; set; } = []; + + /// Native memory region names. + public string[] NativeMemoryRegionNames { get; set; } = []; + + /// Parent region index per region, or -1. + public int[] NativeMemoryRegionParentIndices { get; set; } = []; + + /// Base address per region. + public ulong[] NativeMemoryRegionAddressBases { get; set; } = []; + + /// Size in bytes per region. + public ulong[] NativeMemoryRegionAddressSizes { get; set; } = []; + + /// First allocation index per region, or -1. + public int[] NativeMemoryRegionFirstAllocationIndices { get; set; } = []; + + /// Number of allocations per region. + public int[] NativeMemoryRegionNumAllocations { get; set; } = []; + + /// Native memory label names. + public string[] NativeMemoryLabelNames { get; set; } = []; + + /// Native allocation addresses. + public ulong[] NativeAllocationAddresses { get; set; } = []; + + /// Native allocation sizes in bytes. + public ulong[] NativeAllocationSizes { get; set; } = []; + + /// Native allocation overhead sizes in bytes. + public ulong[] NativeAllocationOverheadSizes { get; set; } = []; + + /// Native allocation padding sizes in bytes. + public ulong[] NativeAllocationPaddingSizes { get; set; } = []; + + /// Memory region index per allocation, or -1. + public int[] NativeAllocationMemoryRegionIndices { get; set; } = []; + + /// VM layout (pointer size, header offsets). + public DecodedVirtualMachineInfo VirtualMachineInformation { get; set; } = new(); + + /// Start address of each managed heap section. + public ulong[] ManagedHeapSectionStartAddresses { get; set; } = []; + + /// Raw bytes of each managed heap section. + public byte[][] ManagedHeapSectionBytes { get; set; } = []; + + /// Managed type flags (value type, array, etc.). + public int[] ManagedTypeFlags { get; set; } = []; + + /// Managed type names. + public string[] ManagedTypeNames { get; set; } = []; + + /// Managed type assembly names. + public string[] ManagedTypeAssemblies { get; set; } = []; + + /// Base or element type index per managed type. + public int[] ManagedTypeBaseOrElementTypeIndices { get; set; } = []; + + /// Managed type size in bytes. + public int[] ManagedTypeSizes { get; set; } = []; + + /// Type info address per managed type (for type resolution on heap). + public ulong[] ManagedTypeInfoAddresses { get; set; } = []; + + /// Per-type array of field description indices. + public int[][] ManagedTypeFieldIndices { get; set; } = []; + + /// Field offset in bytes. + public int[] FieldOffsets { get; set; } = []; + + /// Field type index. + public int[] FieldTypeIndices { get; set; } = []; + + /// Field name. + public string[] FieldNames { get; set; } = []; + + /// Non-zero if field is static. + public byte[] FieldIsStatic { get; set; } = []; +} + diff --git a/Core/Parser/SnapReader.cs b/Core/Parser/SnapReader.cs new file mode 100644 index 0000000..3bf006e --- /dev/null +++ b/Core/Parser/SnapReader.cs @@ -0,0 +1,529 @@ +using System.Buffers.Binary; +using System.Buffers; +using System.Runtime.InteropServices; +using System.Text; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Low-level reader for Unity memory snapshot (.snap) files. Parses the file header, chapter directory, +/// block and entry metadata, and provides typed access to snapshot entries (primitive arrays, strings, dynamic arrays). +/// Call to create an instance; use and before reading. +/// +internal sealed class SnapReader : IDisposable +{ + private const uint HeaderSignature = 0xAEABCDCD; + private const uint DirectorySignature = 0xCDCDAEAB; + private const uint FooterSignature = 0xABCDCDAE; + private const uint ChapterSectionVersion = 0x20170724; + private const uint BlockSectionVersion = 0x20170724; + + private readonly FileStream _stream; + private readonly BinaryReader _reader; + private readonly EntryData[] _entries; + private readonly BlockData[] _blocks; + + private SnapReader(FileStream stream, BinaryReader reader, EntryData[] entries, BlockData[] blocks) + { + _stream = stream; + _reader = reader; + _entries = entries; + _blocks = blocks; + } + + /// + /// Opens a snapshot file and initializes the reader. Validates header/footer signatures and chapter directory, then loads block and entry metadata. + /// + /// Path to the .snap file. + /// A configured ready for entry reads. + /// If file format is invalid or unsupported. + public static SnapReader Open(string snapshotPath) + { + var stream = new FileStream(snapshotPath, FileMode.Open, FileAccess.Read, FileShare.Read); + var reader = new BinaryReader(stream, Encoding.UTF8, leaveOpen: true); + try + { + ValidateSignatures(reader, stream.Length, out var chapterDirectoryOffset); + + stream.Position = chapterDirectoryOffset; + var directorySig = reader.ReadUInt32(); + var chapterVersion = reader.ReadUInt32(); + if (directorySig != DirectorySignature) + throw new InvalidOperationException($"Invalid snapshot chapter directory signature: 0x{directorySig:X8}"); + if (chapterVersion != ChapterSectionVersion) + throw new InvalidOperationException($"Unsupported chapter section version: 0x{chapterVersion:X8}"); + + var blockSectionOffset = reader.ReadInt64(); + var entryDirectoryOffset = chapterDirectoryOffset + sizeof(uint) + sizeof(uint) + sizeof(long); + var entryOffsets = ReadEntryOffsets(reader, entryDirectoryOffset); + var blockOffsets = ReadBlockOffsets(reader, blockSectionOffset); + var blocks = ReadBlocks(reader, blockOffsets); + var entries = ReadEntries(reader, entryOffsets); + + return new SnapReader(stream, reader, entries, blocks); + } + catch + { + reader.Dispose(); + stream.Dispose(); + throw; + } + } + + /// Returns whether the snapshot contains data for the given entry type. + /// The snapshot entry type to check. + /// True if the entry is present and defined. + public bool HasEntry(SnapEntryType entryType) => (int)entryType < _entries.Length && _entries[(int)entryType].IsDefined; + + /// Returns the number of elements in the given entry (1 for single-element, array length otherwise). + /// The snapshot entry type. + /// Element count for the entry. + /// If the entry is missing or index out of range. + public uint GetEntryCount(SnapEntryType entryType) + { + EnsureDefined(entryType); + return _entries[(int)entryType].Count; + } + + /// Reads the snapshot format version number from metadata. + /// Format version (e.g. 10, 18). + public uint ReadMetadataVersion() => ReadSingle(SnapEntryType.Metadata_Version); + + /// Reads the snapshot record date as .NET ticks (UTC), or 0 if the entry is missing. + /// Ticks value or 0. + public long ReadMetadataRecordDateTicks() + { + if (!HasEntry(SnapEntryType.Metadata_RecordDate)) + return 0; + return ReadSingle(SnapEntryType.Metadata_RecordDate); + } + + /// + /// Reads an entry as an array of unmanaged primitives. Supports single-element, constant-size, and dynamic-size entry formats. + /// + /// Unmanaged type (e.g. int, long, ulong). + /// The entry to read. + /// Array of values; may be empty if the entry has no data. + /// If entry is missing, format is unsupported, or size mismatch. + public T[] ReadPrimitiveArray(SnapEntryType entryType) where T : unmanaged + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + var elementSize = Marshal.SizeOf(); + if (entry.Format == SnapEntryFormat.SingleElement) + { + var bytes = ReadConstEntryBytes(entry, 0, 1); + if (bytes.Length == 0) + return []; + if (bytes.Length % elementSize != 0) + { + throw new InvalidOperationException( + $"Entry '{entryType}' byte-size {bytes.Length} is not divisible by element size {elementSize}."); + } + + var singleCount = bytes.Length / elementSize; + var output = new T[singleCount]; + bytes.AsSpan().CopyTo(MemoryMarshal.AsBytes(output.AsSpan())); + return output; + } + + var count = checked((int)entry.Count); + if (count == 0) + return []; + + if (entry.Format == SnapEntryFormat.ConstantSizeElementArray) + { + var expectedBytes = checked(count * elementSize); + var bytes = ReadConstEntryBytes(entry, 0, count); + if (bytes.Length != expectedBytes) + throw new InvalidOperationException($"Entry '{entryType}' byte-size mismatch. expected={expectedBytes}, actual={bytes.Length}"); + + var output = new T[count]; + var source = bytes.AsSpan(); + var destination = MemoryMarshal.AsBytes(output.AsSpan()); + source.CopyTo(destination); + return output; + } + + if (entry.Format == SnapEntryFormat.DynamicSizeElementArray) + { + var output = new T[count]; + Span smallBuffer = stackalloc byte[256]; + for (var i = 0; i < count; i++) + { + GetDynamicElementBounds(entry, i, out var start, out var length); + if (length != elementSize) + throw new InvalidOperationException( + $"Dynamic entry '{entryType}' element {i} has unexpected size {length}, expected {elementSize}."); + + Span bytes = elementSize <= 256 ? smallBuffer[..elementSize] : new byte[elementSize]; + ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, bytes[..elementSize]); + output[i] = MemoryMarshal.Read(bytes); + } + return output; + } + + throw new InvalidOperationException($"Entry '{entryType}' has unsupported format '{entry.Format}'."); + } + + /// + /// Reads an entry as an array of UTF-8 strings. The entry must be in dynamic-size element array format. + /// + /// The entry to read. + /// Array of decoded strings. + /// If entry is missing or not a dynamic string array. + public string[] ReadUtf8StringArray(SnapEntryType entryType) + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + if (entry.Format != SnapEntryFormat.DynamicSizeElementArray) + throw new InvalidOperationException($"Entry '{entryType}' is not a dynamic string array."); + + var count = checked((int)entry.Count); + var output = new string[count]; + for (var i = 0; i < count; i++) + { + GetDynamicElementBounds(entry, i, out var start, out var length); + if (length == 0) + { + output[i] = string.Empty; + continue; + } + + var rented = ArrayPool.Shared.Rent(length); + try + { + ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, rented.AsSpan(0, length)); + output[i] = Encoding.UTF8.GetString(rented, 0, length); + } + finally + { + ArrayPool.Shared.Return(rented); + } + } + + return output; + } + + /// + /// Reads an entry as an array of variable-length byte arrays (dynamic-size element array format). + /// + /// The entry to read. + /// Array of byte arrays, one per element. + /// If entry is missing or not dynamic. + public byte[][] ReadDynamicByteArrays(SnapEntryType entryType) + { + EnsureDefined(entryType); + var entry = _entries[(int)entryType]; + if (entry.Format != SnapEntryFormat.DynamicSizeElementArray) + throw new InvalidOperationException($"Entry '{entryType}' is not a dynamic array."); + + var count = checked((int)entry.Count); + var output = new byte[count][]; + for (var i = 0; i < count; i++) + output[i] = ReadDynamicElementBytes(entry, i); + return output; + } + + /// + /// Reads an entry as an array of variable-length primitive arrays. Each element is a byte array decoded into T[]. + /// + /// Unmanaged element type. + /// The entry to read. + /// Jagged array of primitive arrays. + /// If entry is missing or element length is not divisible by sizeof(T). + public T[][] ReadDynamicPrimitiveArrays(SnapEntryType entryType) where T : unmanaged + { + var bytes = ReadDynamicByteArrays(entryType); + var output = new T[bytes.Length][]; + var elementSize = Marshal.SizeOf(); + for (var i = 0; i < bytes.Length; i++) + { + if (bytes[i].Length % elementSize != 0) + { + throw new InvalidOperationException( + $"Dynamic entry '{entryType}' element {i} length {bytes[i].Length} is not divisible by element size {elementSize}."); + } + + var elementCount = bytes[i].Length / elementSize; + var row = new T[elementCount]; + bytes[i].AsSpan().CopyTo(MemoryMarshal.AsBytes(row.AsSpan())); + output[i] = row; + } + + return output; + } + + /// Releases the file stream and binary reader. + public void Dispose() + { + _reader.Dispose(); + _stream.Dispose(); + } + + private T ReadSingle(SnapEntryType entryType) where T : unmanaged + { + var arr = ReadPrimitiveArray(entryType); + if (arr.Length == 0) + throw new InvalidOperationException($"Entry '{entryType}' has no elements."); + return arr[0]; + } + + private byte[] ReadConstEntryBytes(EntryData entry, int startIndex, int count) + { + if (entry.Format == SnapEntryFormat.SingleElement && startIndex == 0 && count == 1) + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], checked((long)entry.HeaderMeta), checked((int)entry.EntriesMeta)); + + if (entry.Format != SnapEntryFormat.ConstantSizeElementArray) + throw new InvalidOperationException($"Entry '{entry.EntryType}' is not a constant-size array."); + + var byteOffset = checked((long)entry.EntriesMeta * startIndex); + var byteLength = checked((int)(entry.EntriesMeta * (uint)count)); + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], byteOffset, byteLength); + } + + private byte[] ReadDynamicElementBytes(EntryData entry, int elementIndex) + { + GetDynamicElementBounds(entry, elementIndex, out var start, out var length); + return ReadBlockRange(_blocks[checked((int)entry.BlockIndex)], start, length); + } + + private static void GetDynamicElementBounds(EntryData entry, int elementIndex, out long start, out int length) + { + if (entry.DynamicOffsets == null) + throw new InvalidOperationException($"Entry '{entry.EntryType}' has no dynamic offsets."); + if (elementIndex < 0 || elementIndex >= entry.DynamicOffsets.Length) + throw new ArgumentOutOfRangeException(nameof(elementIndex)); + + start = entry.DynamicOffsets[elementIndex]; + var end = elementIndex == entry.DynamicOffsets.Length - 1 + ? checked((long)entry.HeaderMeta) + : entry.DynamicOffsets[elementIndex + 1]; + length = checked((int)(end - start)); + if (length < 0) + throw new InvalidOperationException($"Entry '{entry.EntryType}' has invalid dynamic offsets."); + } + + private byte[] ReadBlockRange(BlockData block, long blockRelativeOffset, int byteLength) + { + var output = new byte[byteLength]; + if (byteLength == 0) + return output; + + ReadBlockRange(block, blockRelativeOffset, output); + return output; + } + + private void ReadBlockRange(BlockData block, long blockRelativeOffset, Span destination) + { + if (destination.Length == 0) + return; + + var chunkSize = checked((long)block.ChunkSize); + var readCursor = 0; + var offset = blockRelativeOffset; + while (readCursor < destination.Length) + { + var chunkIndex = checked((int)(offset / chunkSize)); + if (chunkIndex < 0 || chunkIndex >= block.ChunkOffsets.Length) + throw new InvalidOperationException("Chunk index out of range while reading snapshot block."); + + var offsetInChunk = offset % chunkSize; + var availableInChunk = chunkSize - offsetInChunk; + var toRead = (int)Math.Min(availableInChunk, destination.Length - readCursor); + var absoluteFileOffset = checked(block.ChunkOffsets[chunkIndex] + offsetInChunk); + + _stream.Position = absoluteFileOffset; + var read = _stream.Read(destination.Slice(readCursor, toRead)); + if (read != toRead) + throw new InvalidOperationException("Unexpected EOF while reading snapshot block."); + + readCursor += toRead; + offset += toRead; + } + } + + private void EnsureDefined(SnapEntryType entryType) + { + var idx = (int)entryType; + if (idx < 0 || idx >= _entries.Length) + throw new InvalidOperationException($"Entry type index out of range: {entryType}"); + if (!_entries[idx].IsDefined) + throw new InvalidOperationException($"Entry '{entryType}' is missing in this snapshot."); + } + + private static void ValidateSignatures(BinaryReader reader, long fileLength, out long chapterDirectoryOffset) + { + if (fileLength < 16) + throw new InvalidOperationException("Snapshot file is too small."); + + reader.BaseStream.Position = 0; + var headerSig = reader.ReadUInt32(); + if (headerSig != HeaderSignature) + throw new InvalidOperationException($"Invalid snapshot header signature: 0x{headerSig:X8}"); + + reader.BaseStream.Position = fileLength - sizeof(uint); + var footerSig = reader.ReadUInt32(); + if (footerSig != FooterSignature) + throw new InvalidOperationException($"Invalid snapshot footer signature: 0x{footerSig:X8}"); + + reader.BaseStream.Position = fileLength - sizeof(uint) - sizeof(long); + chapterDirectoryOffset = reader.ReadInt64(); + if (chapterDirectoryOffset <= 0 || chapterDirectoryOffset >= fileLength) + throw new InvalidOperationException("Snapshot chapter directory offset is invalid."); + } + + private static long[] ReadEntryOffsets(BinaryReader reader, long entryDirectoryOffset) + { + reader.BaseStream.Position = entryDirectoryOffset; + var entryCount = reader.ReadInt32(); + if (entryCount <= 0) + return []; + + var offsets = new long[entryCount]; + for (var i = 0; i < entryCount; i++) + offsets[i] = reader.ReadInt64(); + return offsets; + } + + private static long[] ReadBlockOffsets(BinaryReader reader, long blockSectionOffset) + { + reader.BaseStream.Position = blockSectionOffset; + var blockVersion = reader.ReadUInt32(); + if (blockVersion != BlockSectionVersion) + throw new InvalidOperationException($"Unsupported block section version: 0x{blockVersion:X8}"); + + var blockCount = reader.ReadInt32(); + if (blockCount <= 0) + throw new InvalidOperationException("Snapshot block section has no blocks."); + + var offsets = new long[blockCount]; + for (var i = 0; i < blockCount; i++) + offsets[i] = reader.ReadInt64(); + return offsets; + } + + private static BlockData[] ReadBlocks(BinaryReader reader, long[] blockOffsets) + { + var blocks = new BlockData[blockOffsets.Length]; + for (var i = 0; i < blockOffsets.Length; i++) + { + reader.BaseStream.Position = blockOffsets[i]; + var chunkSize = reader.ReadUInt64(); + var totalBytes = reader.ReadUInt64(); + if (chunkSize == 0) + throw new InvalidOperationException($"Block {i} has zero chunk size."); + + var offsetCount = (int)(totalBytes / chunkSize + (totalBytes % chunkSize == 0 ? 0UL : 1UL)); + var chunkOffsets = new long[offsetCount]; + for (var c = 0; c < offsetCount; c++) + chunkOffsets[c] = reader.ReadInt64(); + + blocks[i] = new BlockData(chunkSize, totalBytes, chunkOffsets); + } + + return blocks; + } + + private static EntryData[] ReadEntries(BinaryReader reader, long[] entryOffsets) + { + var entries = new EntryData[entryOffsets.Length]; + for (var i = 0; i < entries.Length; i++) + entries[i] = EntryData.Undefined((SnapEntryType)i); + + for (var i = 0; i < entryOffsets.Length; i++) + { + var offset = entryOffsets[i]; + if (offset == 0) + continue; + + reader.BaseStream.Position = offset; + var format = (SnapEntryFormat)reader.ReadUInt16(); + var blockIndex = reader.ReadUInt32(); + var entriesMeta = reader.ReadUInt32(); + var headerMeta = reader.ReadUInt64(); + long[]? dynamicOffsets = null; + + if (format == SnapEntryFormat.DynamicSizeElementArray) + { + var count = checked((int)entriesMeta); + dynamicOffsets = new long[count]; + for (var d = 0; d < count; d++) + dynamicOffsets[d] = reader.ReadInt64(); + + if (count > 0) + { + var totalSize = dynamicOffsets[count - 1]; + for (var d = count - 1; d >= 1; d--) + dynamicOffsets[d] = dynamicOffsets[d - 1]; + dynamicOffsets[0] = checked((long)headerMeta); + headerMeta = checked((ulong)totalSize); + } + } + + entries[i] = new EntryData( + (SnapEntryType)i, + true, + format, + blockIndex, + entriesMeta, + headerMeta, + dynamicOffsets); + } + + return entries; + } + + private sealed class BlockData + { + public BlockData(ulong chunkSize, ulong totalBytes, long[] chunkOffsets) + { + ChunkSize = chunkSize; + TotalBytes = totalBytes; + ChunkOffsets = chunkOffsets; + } + + public ulong ChunkSize { get; } + public ulong TotalBytes { get; } + public long[] ChunkOffsets { get; } + } + + private sealed class EntryData + { + public EntryData( + SnapEntryType entryType, + bool isDefined, + SnapEntryFormat format, + uint blockIndex, + uint entriesMeta, + ulong headerMeta, + long[]? dynamicOffsets) + { + EntryType = entryType; + IsDefined = isDefined; + Format = format; + BlockIndex = blockIndex; + EntriesMeta = entriesMeta; + HeaderMeta = headerMeta; + DynamicOffsets = dynamicOffsets; + } + + public static EntryData Undefined(SnapEntryType type) => new(type, false, SnapEntryFormat.Undefined, 0, 0, 0, null); + + public SnapEntryType EntryType { get; } + public bool IsDefined { get; } + public SnapEntryFormat Format { get; } + public uint BlockIndex { get; } + public uint EntriesMeta { get; } + public ulong HeaderMeta { get; } + public long[]? DynamicOffsets { get; } + public uint Count => Format switch + { + SnapEntryFormat.SingleElement => 1, + SnapEntryFormat.ConstantSizeElementArray => (uint)HeaderMeta, + SnapEntryFormat.DynamicSizeElementArray => EntriesMeta, + _ => 0 + }; + } +} + diff --git a/Core/Parser/SnapSectionDecoders.cs b/Core/Parser/SnapSectionDecoders.cs new file mode 100644 index 0000000..86afbd8 --- /dev/null +++ b/Core/Parser/SnapSectionDecoders.cs @@ -0,0 +1,423 @@ +namespace MemorySnapshotDataTools.Parser; + +/// +/// Decodes all snapshot sections from a into a single . +/// Reads metadata, native types/objects, connections, roots, memory regions, allocations, managed heap sections, type descriptions, and fields. +/// Validates array length consistency before returning. +/// +internal static class SnapSectionDecoders +{ + private const ulong HeapSectionTypeFlagMask = 1UL << 63; + + /// + /// Reads every required and optional entry from the snapshot and populates a . + /// + /// Open snapshot reader (e.g. from ). + /// A fully populated decoded snapshot; throws if required entries are missing or lengths are inconsistent. + /// If a required entry is missing or array lengths do not match. + public static DecodedSnapshot DecodeAll(SnapReader reader) + { + var formatVersion = reader.ReadMetadataVersion(); + var nativeObjectTypeIndices = ReadInts(reader, SnapEntryType.NativeObjects_NativeTypeArrayIndex); + var nativeObjectCount = nativeObjectTypeIndices.Length; + var nativeObjectInstanceIds = ReadInstanceIds(reader, formatVersion); + var nativeObjectGcHandleIndices = ReadNativeObjectGcHandleIndices(reader, formatVersion, nativeObjectCount); + var gcHandleTargets = ReadOptionalULongs(reader, SnapEntryType.GCHandles_Target); + var (connectionsFrom, connectionsTo) = ReadConnections( + reader, + formatVersion, + nativeObjectInstanceIds, + nativeObjectGcHandleIndices, + gcHandleTargets.Length); + var nativeMemoryRegionAddressBases = ReadULongs(reader, SnapEntryType.NativeMemoryRegions_AddressBase); + var nativeMemoryRegionCount = nativeMemoryRegionAddressBases.Length; + var nativeAllocationAddresses = ReadULongs(reader, SnapEntryType.NativeAllocations_Address); + var nativeAllocationCount = nativeAllocationAddresses.Length; + + var snapshot = new DecodedSnapshot + { + FormatVersion = formatVersion, + RecordDateTicksUtc = reader.ReadMetadataRecordDateTicks(), + NativeObjectTypeIndices = nativeObjectTypeIndices, + NativeObjectInstanceIds = nativeObjectInstanceIds, + NativeObjectSizes = ReadULongs(reader, SnapEntryType.NativeObjects_Size), + NativeObjectFlags = ReadIntsWithCount(reader, SnapEntryType.NativeObjects_Flags, nativeObjectCount, 0), + NativeObjectGcHandleIndices = nativeObjectGcHandleIndices, + GcHandleTargets = gcHandleTargets, + ConnectionsFrom = connectionsFrom, + ConnectionsTo = connectionsTo, + NativeRootIds = ReadLongs(reader, SnapEntryType.NativeRootReferences_Id), + NativeRootAccumulatedSizes = ReadULongs(reader, SnapEntryType.NativeRootReferences_AccumulatedSize), + NativeMemoryRegionAddressBases = nativeMemoryRegionAddressBases, + NativeMemoryRegionAddressSizes = ReadULongsWithCount(reader, SnapEntryType.NativeMemoryRegions_AddressSize, nativeMemoryRegionCount), + NativeMemoryRegionParentIndices = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_ParentIndex, nativeMemoryRegionCount, -1), + NativeMemoryRegionFirstAllocationIndices = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_FirstAllocationIndex, nativeMemoryRegionCount, -1), + NativeMemoryRegionNumAllocations = ReadIntsWithCount(reader, SnapEntryType.NativeMemoryRegions_NumAllocations, nativeMemoryRegionCount, 0), + NativeAllocationAddresses = nativeAllocationAddresses, + NativeAllocationSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_Size, nativeAllocationCount), + NativeAllocationOverheadSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_OverheadSize, nativeAllocationCount), + NativeAllocationPaddingSizes = ReadULongsWithCount(reader, SnapEntryType.NativeAllocations_PaddingSize, nativeAllocationCount), + NativeAllocationMemoryRegionIndices = ReadIntsWithCount(reader, SnapEntryType.NativeAllocations_MemoryRegionIndex, nativeAllocationCount, -1), + VirtualMachineInformation = ReadVirtualMachineInfo(reader), + ManagedHeapSectionStartAddresses = ReadManagedHeapSectionStartAddresses(reader, formatVersion), + ManagedHeapSectionBytes = ReadRequiredDynamicBytes(reader, SnapEntryType.ManagedHeapSections_Bytes), + ManagedTypeFlags = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_Flags), + ManagedTypeNames = ReadRequiredStrings(reader, SnapEntryType.TypeDescriptions_Name), + ManagedTypeAssemblies = ReadRequiredStrings(reader, SnapEntryType.TypeDescriptions_Assembly), + ManagedTypeFieldIndices = ReadRequiredDynamicInts(reader, SnapEntryType.TypeDescriptions_FieldIndices), + ManagedTypeBaseOrElementTypeIndices = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_BaseOrElementTypeIndex), + ManagedTypeSizes = ReadRequiredInts(reader, SnapEntryType.TypeDescriptions_Size), + ManagedTypeInfoAddresses = ReadRequiredULongs(reader, SnapEntryType.TypeDescriptions_TypeInfoAddress), + FieldOffsets = ReadRequiredInts(reader, SnapEntryType.FieldDescriptions_Offset), + FieldTypeIndices = ReadRequiredInts(reader, SnapEntryType.FieldDescriptions_TypeIndex), + FieldNames = ReadRequiredStrings(reader, SnapEntryType.FieldDescriptions_Name), + FieldIsStatic = ReadRequiredBytes(reader, SnapEntryType.FieldDescriptions_IsStatic), + }; + + snapshot.NativeTypeNames = ReadStringsWithCount(reader, SnapEntryType.NativeTypes_Name, 0); + snapshot.NativeObjectNames = ReadStringsWithCount(reader, SnapEntryType.NativeObjects_Name, snapshot.NativeObjectTypeIndices.Length); + snapshot.NativeRootAreaNames = ReadStringsWithCount(reader, SnapEntryType.NativeRootReferences_AreaName, snapshot.NativeRootIds.Length); + snapshot.NativeRootObjectNames = ReadStringsWithCount(reader, SnapEntryType.NativeRootReferences_ObjectName, snapshot.NativeRootIds.Length); + snapshot.NativeMemoryRegionNames = ReadStringsWithCount(reader, SnapEntryType.NativeMemoryRegions_Name, snapshot.NativeMemoryRegionAddressBases.Length); + snapshot.NativeMemoryLabelNames = ReadStrings(reader, SnapEntryType.NativeMemoryLabels_Name); + + ValidateLengths(snapshot); + return snapshot; + } + + private static void ValidateLengths(DecodedSnapshot snapshot) + { + var nativeCount = snapshot.NativeObjectNames.Length; + if (nativeCount > 0) + { + EnsureArrayLength(nativeCount, snapshot.NativeObjectTypeIndices.Length, "NativeObjects_NativeTypeArrayIndex"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectInstanceIds.Length, "NativeObjects_InstanceId"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectSizes.Length, "NativeObjects_Size"); + EnsureArrayLength(nativeCount, snapshot.NativeObjectGcHandleIndices.Length, "NativeObjects_GCHandleIndex"); + if (snapshot.NativeObjectFlags.Length > 0) + EnsureArrayLength(nativeCount, snapshot.NativeObjectFlags.Length, "NativeObjects_Flags"); + } + + var rootsCount = snapshot.NativeRootIds.Length; + if (snapshot.NativeRootAreaNames.Length > 0) + EnsureArrayLength(rootsCount, snapshot.NativeRootAreaNames.Length, "NativeRootReferences_AreaName"); + if (snapshot.NativeRootObjectNames.Length > 0) + EnsureArrayLength(rootsCount, snapshot.NativeRootObjectNames.Length, "NativeRootReferences_ObjectName"); + EnsureArrayLength(rootsCount, snapshot.NativeRootAccumulatedSizes.Length, "NativeRootReferences_AccumulatedSize"); + + EnsureArrayLength(snapshot.ConnectionsFrom.Length, snapshot.ConnectionsTo.Length, "Connections_To"); + + var regionCount = snapshot.NativeMemoryRegionAddressBases.Length; + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionAddressSizes.Length, "NativeMemoryRegions_AddressSize"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionParentIndices.Length, "NativeMemoryRegions_ParentIndex"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionFirstAllocationIndices.Length, "NativeMemoryRegions_FirstAllocationIndex"); + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionNumAllocations.Length, "NativeMemoryRegions_NumAllocations"); + if (snapshot.NativeMemoryRegionNames.Length > 0) + EnsureArrayLength(regionCount, snapshot.NativeMemoryRegionNames.Length, "NativeMemoryRegions_Name"); + + var allocationCount = snapshot.NativeAllocationAddresses.Length; + EnsureArrayLength(allocationCount, snapshot.NativeAllocationSizes.Length, "NativeAllocations_Size"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationOverheadSizes.Length, "NativeAllocations_OverheadSize"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationPaddingSizes.Length, "NativeAllocations_PaddingSize"); + EnsureArrayLength(allocationCount, snapshot.NativeAllocationMemoryRegionIndices.Length, "NativeAllocations_MemoryRegionIndex"); + + EnsureArrayLength(snapshot.ManagedHeapSectionStartAddresses.Length, snapshot.ManagedHeapSectionBytes.Length, "ManagedHeapSections_Bytes"); + + var managedTypeCount = snapshot.ManagedTypeNames.Length; + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeFlags.Length, "TypeDescriptions_Flags"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeAssemblies.Length, "TypeDescriptions_Assembly"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeFieldIndices.Length, "TypeDescriptions_FieldIndices"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeBaseOrElementTypeIndices.Length, "TypeDescriptions_BaseOrElementTypeIndex"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeSizes.Length, "TypeDescriptions_Size"); + EnsureArrayLength(managedTypeCount, snapshot.ManagedTypeInfoAddresses.Length, "TypeDescriptions_TypeInfoAddress"); + + var fieldCount = snapshot.FieldNames.Length; + EnsureArrayLength(fieldCount, snapshot.FieldOffsets.Length, "FieldDescriptions_Offset"); + EnsureArrayLength(fieldCount, snapshot.FieldTypeIndices.Length, "FieldDescriptions_TypeIndex"); + EnsureArrayLength(fieldCount, snapshot.FieldIsStatic.Length, "FieldDescriptions_IsStatic"); + } + + private static void EnsureArrayLength(int expected, int actual, string name) + { + if (expected != actual) + throw new InvalidOperationException($"Array length mismatch for {name}. expected={expected}, actual={actual}"); + } + + private static string[] ReadStrings(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadUtf8StringArray(type) : []; + + private static string[] ReadStringsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount) + { + if (!reader.HasEntry(type)) + return fallbackCount > 0 ? Enumerable.Repeat(string.Empty, fallbackCount).ToArray() : []; + + try + { + return reader.ReadUtf8StringArray(type); + } + catch + { + var count = fallbackCount; + if (count <= 0) + { + try + { + count = checked((int)reader.GetEntryCount(type)); + } + catch + { + count = 0; + } + } + + return count > 0 ? Enumerable.Repeat(string.Empty, count).ToArray() : []; + } + } + + private static int[] ReadInts(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static int[] ReadRequiredInts(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static int[] ReadIntsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount, int fallbackValue = 0) + { + var values = ReadOptionalInts(reader, type); + if (values.Length > 0) + return values; + + return fallbackCount > 0 ? Enumerable.Repeat(fallbackValue, fallbackCount).ToArray() : []; + } + + private static long[] ReadRequiredLongs(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static ulong[] ReadRequiredULongs(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static ulong[] ReadULongsWithCount(SnapReader reader, SnapEntryType type, int fallbackCount) + { + var values = ReadOptionalULongs(reader, type); + if (values.Length > 0) + return values; + + return fallbackCount > 0 ? new ulong[fallbackCount] : []; + } + + private static byte[] ReadRequiredBytes(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadPrimitiveArray(type); + } + + private static string[] ReadRequiredStrings(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadUtf8StringArray(type); + } + + private static int[][] ReadRequiredDynamicInts(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadDynamicPrimitiveArrays(type); + } + + private static byte[][] ReadRequiredDynamicBytes(SnapReader reader, SnapEntryType type) + { + EnsureEntryExists(reader, type); + return reader.ReadDynamicByteArrays(type); + } + + private static DecodedVirtualMachineInfo ReadVirtualMachineInfo(SnapReader reader) + { + EnsureEntryExists(reader, SnapEntryType.Metadata_VirtualMachineInformation); + var values = reader.ReadPrimitiveArray(SnapEntryType.Metadata_VirtualMachineInformation); + if (values.Length < 6) + { + throw new InvalidOperationException( + $"Metadata_VirtualMachineInformation expected at least 6 uints, found {values.Length}."); + } + + return new DecodedVirtualMachineInfo + { + PointerSize = values[0], + ObjectHeaderSize = values[1], + ArrayHeaderSize = values[2], + ArrayBoundsOffsetInHeader = values[3], + ArraySizeOffsetInHeader = values[4], + AllocationGranularity = values[5], + }; + } + + private static ulong[] ReadManagedHeapSectionStartAddresses(SnapReader reader, uint formatVersion) + { + var starts = ReadRequiredULongs(reader, SnapEntryType.ManagedHeapSections_StartAddress); + if (formatVersion < SnapFormatVersion.MemLabelSizeAndHeapIdVersion) + return starts; + + var unmasked = new ulong[starts.Length]; + for (var i = 0; i < starts.Length; i++) + unmasked[i] = starts[i] & ~HeapSectionTypeFlagMask; + return unmasked; + } + + private static ulong[] ReadInstanceIds(SnapReader reader, uint formatVersion) + { + if (!reader.HasEntry(SnapEntryType.NativeObjects_InstanceId)) + return []; + + if (formatVersion >= SnapFormatVersion.EntityIDAs8ByteStructs) + return reader.ReadPrimitiveArray(SnapEntryType.NativeObjects_InstanceId); + + var ids32 = reader.ReadPrimitiveArray(SnapEntryType.NativeObjects_InstanceId); + var ids = new ulong[ids32.Length]; + for (var i = 0; i < ids32.Length; i++) + ids[i] = unchecked((uint)ids32[i]); + return ids; + } + + private static int[] ReadNativeObjectGcHandleIndices(SnapReader reader, uint formatVersion, int nativeObjectCount) + { + if (formatVersion < SnapFormatVersion.NativeConnectionsAsInstanceIdsVersion) + return Enumerable.Repeat(-1, nativeObjectCount).ToArray(); + + var gcHandleIndices = ReadOptionalInts(reader, SnapEntryType.NativeObjects_GCHandleIndex); + if (gcHandleIndices.Length == 0) + gcHandleIndices = ReadOptionalInts(reader, SnapEntryType.NativeObjects_GCHandleIndex_Legacy); + if (gcHandleIndices.Length == nativeObjectCount) + return gcHandleIndices; + + var fallback = Enumerable.Repeat(-1, nativeObjectCount).ToArray(); + if (gcHandleIndices.Length == 0) + return fallback; + + Array.Copy(gcHandleIndices, fallback, Math.Min(gcHandleIndices.Length, fallback.Length)); + return fallback; + } + + private static (int[] from, int[] to) ReadConnections( + SnapReader reader, + uint formatVersion, + ulong[] nativeObjectInstanceIds, + int[] nativeObjectGcHandleIndices, + int gcHandleCount) + { + if (!reader.HasEntry(SnapEntryType.Connections_From) || !reader.HasEntry(SnapEntryType.Connections_To)) + return ([], []); + + if (formatVersion < SnapFormatVersion.NativeConnectionsAsInstanceIdsVersion) + { + var fromUnified = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + var toUnified = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + if (fromUnified.Length != toUnified.Length) + throw new InvalidOperationException($"Array length mismatch for Connections_To. expected={fromUnified.Length}, actual={toUnified.Length}"); + return (fromUnified, toUnified); + } + + ulong[] fromInstanceIds; + ulong[] toInstanceIds; + if (formatVersion >= SnapFormatVersion.EntityIDAs8ByteStructs) + { + fromInstanceIds = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + toInstanceIds = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + } + else + { + var from32 = reader.ReadPrimitiveArray(SnapEntryType.Connections_From); + var to32 = reader.ReadPrimitiveArray(SnapEntryType.Connections_To); + fromInstanceIds = new ulong[from32.Length]; + toInstanceIds = new ulong[to32.Length]; + for (var i = 0; i < from32.Length; i++) + fromInstanceIds[i] = unchecked((uint)from32[i]); + for (var i = 0; i < to32.Length; i++) + toInstanceIds[i] = unchecked((uint)to32[i]); + } + + if (fromInstanceIds.Length != toInstanceIds.Length) + throw new InvalidOperationException($"Array length mismatch for Connections_To. expected={fromInstanceIds.Length}, actual={toInstanceIds.Length}"); + + var instanceIdToUnifiedIndex = new Dictionary(nativeObjectInstanceIds.Length); + var instanceIdToGcHandleIndex = new Dictionary(nativeObjectInstanceIds.Length); + for (var i = 0; i < nativeObjectInstanceIds.Length; i++) + { + var instanceId = nativeObjectInstanceIds[i]; + instanceIdToUnifiedIndex[instanceId] = gcHandleCount + i; + var gcHandleIndex = i < nativeObjectGcHandleIndices.Length ? nativeObjectGcHandleIndices[i] : -1; + if (gcHandleIndex >= 0) + instanceIdToGcHandleIndex[instanceId] = gcHandleIndex; + } + + var remappedFrom = new List(fromInstanceIds.Length + instanceIdToGcHandleIndex.Count); + var remappedTo = new List(toInstanceIds.Length + instanceIdToGcHandleIndex.Count); + for (var i = 0; i < fromInstanceIds.Length; i++) + { + if (!instanceIdToUnifiedIndex.TryGetValue(fromInstanceIds[i], out var fromUnified)) + continue; + if (!instanceIdToUnifiedIndex.TryGetValue(toInstanceIds[i], out var toUnified)) + continue; + remappedFrom.Add(fromUnified); + remappedTo.Add(toUnified); + } + + foreach (var (instanceId, gcHandleIndex) in instanceIdToGcHandleIndex) + { + if (!instanceIdToUnifiedIndex.TryGetValue(instanceId, out var fromUnified)) + continue; + remappedFrom.Add(fromUnified); + remappedTo.Add(gcHandleIndex); + } + + return (remappedFrom.ToArray(), remappedTo.ToArray()); + } + + private static int[] ReadOptionalInts(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + return []; + + try + { + return reader.ReadPrimitiveArray(type); + } + catch + { + return []; + } + } + + private static long[] ReadLongs(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static ulong[] ReadULongs(SnapReader reader, SnapEntryType type) + => reader.HasEntry(type) ? reader.ReadPrimitiveArray(type) : []; + + private static ulong[] ReadOptionalULongs(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + return []; + try + { + return reader.ReadPrimitiveArray(type); + } + catch + { + return []; + } + } + + private static void EnsureEntryExists(SnapReader reader, SnapEntryType type) + { + if (!reader.HasEntry(type)) + throw new InvalidOperationException($"Required snapshot entry '{type}' is missing."); + } +} + diff --git a/Core/Parser/SnapshotBridge.cs b/Core/Parser/SnapshotBridge.cs new file mode 100644 index 0000000..be50aa6 --- /dev/null +++ b/Core/Parser/SnapshotBridge.cs @@ -0,0 +1,261 @@ +using System.Globalization; +using MemorySnapshotDataTools.Parser; + +namespace MemorySnapshotDataTools.Parser; + +/// +/// Bridge between raw Unity .snap file format and . +/// Reads a snapshot via , decodes sections with , +/// then extracts native objects, managed heap objects, connections, roots, memory regions, and allocations. +/// +public static class SnapshotBridge +{ + /// + /// Reads the snapshot from disk, decodes all sections, and extracts raw data into a instance. + /// Reports progress via and respects for cancellation. + /// + /// Full path to the .snap file. + /// Reporter for status messages during extraction. + /// Cancellation token. + /// Fully populated raw snapshot data, validated in memory. + /// When is cancelled. + public static RawSnapshotData ExtractRawData(string snapshotPath, IProgressReporter progress, CancellationToken token) + { + progress.Report("Reading snapshot sections..."); + using var reader = SnapReader.Open(snapshotPath); + var decoded = SnapSectionDecoders.DecodeAll(reader); + token.ThrowIfCancellationRequested(); + return ExtractFromDecoded(decoded, snapshotPath); + } + + /// + /// Extracts raw snapshot data from an already-decoded snapshot. Used by tests and by after decoding. + /// Populates native roots, memory regions, allocations, native objects, managed objects (via crawler), and connections, then validates. + /// + /// Decoded snapshot from . + /// Path to the source .snap file (stored in ). + /// Validated . + public static RawSnapshotData ExtractFromDecoded(DecodedSnapshot decoded, string snapshotPath) + { + var data = new RawSnapshotData + { + SnapshotInfo = new SnapshotInfo + { + SnapshotPath = snapshotPath, + ExportedAtUtc = DateTime.UtcNow.ToString("O", CultureInfo.InvariantCulture), + UnityVersion = $"format:{decoded.FormatVersion}", + } + }; + + ExtractNativeRoots(decoded, data.NativeRoots); + ExtractMemoryRegions(decoded, data.MemoryRegions); + ExtractNativeAllocations(decoded, data.NativeAllocations); + ExtractNativeObjects(decoded, data.NativeObjects); + var managedCrawl = ManagedSnapshotCrawler.Crawl(decoded); + data.ManagedObjects.AddRange(managedCrawl.ManagedObjects); + ExtractConnections(decoded, managedCrawl.ManagedConnections, data.Connections); + ValidateStrictInMemory(data); + return data; + } + + private static void ExtractNativeRoots(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeRootIds.Length; + for (var i = 0; i < decoded.NativeRootIds.Length; i++) + { + output.Add(new NativeRootRow + { + RootIndex = i, + RootId = decoded.NativeRootIds[i], + AreaName = decoded.NativeRootAreaNames[i] ?? string.Empty, + ObjectName = decoded.NativeRootObjectNames[i] ?? string.Empty, + AccumulatedSizeBytes = decoded.NativeRootAccumulatedSizes[i], + }); + } + } + + private static void ExtractNativeObjects(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeObjectNames.Length; + for (var i = 0; i < decoded.NativeObjectNames.Length; i++) + { + var typeIndex = decoded.NativeObjectTypeIndices[i]; + output.Add(new NativeObjectRow + { + NativeObjectIndex = i, + InstanceId = decoded.NativeObjectInstanceIds[i].ToString(CultureInfo.InvariantCulture), + Name = decoded.NativeObjectNames[i] ?? string.Empty, + SizeBytes = decoded.NativeObjectSizes[i], + TypeIndex = typeIndex, + NativeTypeName = typeIndex >= 0 && typeIndex < decoded.NativeTypeNames.Length + ? decoded.NativeTypeNames[typeIndex] ?? string.Empty + : string.Empty, + IsDestroyed = i < decoded.NativeObjectFlags.Length && (decoded.NativeObjectFlags[i] & 0x8) != 0, + }); + } + } + + private static void ExtractMemoryRegions(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeMemoryRegionAddressBases.Length; + for (var i = 0; i < decoded.NativeMemoryRegionAddressBases.Length; i++) + { + output.Add(new MemoryRegionRow + { + RegionIndex = i, + AddressBase = decoded.NativeMemoryRegionAddressBases[i], + AddressSize = decoded.NativeMemoryRegionAddressSizes[i], + Name = decoded.NativeMemoryRegionNames[i] ?? string.Empty, + ParentRegionIndex = decoded.NativeMemoryRegionParentIndices[i], + FirstAllocationIndex = decoded.NativeMemoryRegionFirstAllocationIndices[i], + NumAllocations = decoded.NativeMemoryRegionNumAllocations[i], + }); + } + } + + private static void ExtractNativeAllocations(DecodedSnapshot decoded, List output) + { + output.Capacity = decoded.NativeAllocationAddresses.Length; + for (var i = 0; i < decoded.NativeAllocationAddresses.Length; i++) + { + output.Add(new NativeAllocationRow + { + AllocationIndex = i, + Address = decoded.NativeAllocationAddresses[i], + SizeBytes = decoded.NativeAllocationSizes[i], + OverheadSizeBytes = decoded.NativeAllocationOverheadSizes[i], + PaddingSizeBytes = decoded.NativeAllocationPaddingSizes[i], + MemoryRegionIndex = decoded.NativeAllocationMemoryRegionIndices[i], + }); + } + } + + private static void ExtractConnections(DecodedSnapshot decoded, List managedConnections, List output) + { + var dedupe = new HashSet(); + var gcHandleUniqueCount = decoded.GcHandleTargets.Length; + var count = decoded.ConnectionsFrom.Length; + output.Capacity = count + managedConnections.Count; + for (var i = 0; i < count; i++) + { + var fromSource = MapUnifiedIndexToSource(decoded.ConnectionsFrom[i], gcHandleUniqueCount); + var toSource = MapUnifiedIndexToSource(decoded.ConnectionsTo[i], gcHandleUniqueCount); + + var row = new ConnectionRow + { + FromKind = fromSource.Kind, + FromIndex = fromSource.Index, + ToKind = toSource.Kind, + ToIndex = toSource.Index, + ConnectionType = "native_connection", + }; + AddConnectionIfNew(output, dedupe, row); + } + + for (var i = 0; i < managedConnections.Count; i++) + AddConnectionIfNew(output, dedupe, managedConnections[i]); + } + + private static SourceRef MapUnifiedIndexToSource(int unifiedIndex, int gcHandleUniqueCount) + => unifiedIndex < 0 + ? new SourceRef("unknown", unifiedIndex) + : unifiedIndex < gcHandleUniqueCount + ? new SourceRef("managed_object", unifiedIndex) + : new SourceRef("native_object", unifiedIndex - gcHandleUniqueCount); + + private readonly struct SourceRef(string kind, long index) + { + public string Kind { get; } = kind; + public long Index { get; } = index; + } + + private static void AddConnectionIfNew( + List output, + HashSet dedupe, + ConnectionRow row) + { + var key = new ConnectionKey(row.FromKind, row.FromIndex, row.ToKind, row.ToIndex, row.ConnectionType); + if (dedupe.Add(key)) + output.Add(row); + } + + private readonly record struct ConnectionKey( + string FromKind, + long FromIndex, + string ToKind, + long ToIndex, + string ConnectionType); + + private static void ValidateStrictInMemory(RawSnapshotData data) + { + for (var i = 0; i < data.ManagedObjects.Count; i++) + { + var row = data.ManagedObjects[i]; + if (row.ManagedObjectIndex != i) + throw new InvalidOperationException($"Managed object index mismatch. expected={i}, actual={row.ManagedObjectIndex}"); + if (row.Address == 0) + throw new InvalidOperationException($"Managed object {i} has null address."); + if (row.SizeBytes <= 0) + throw new InvalidOperationException($"Managed object {i} has non-positive size {row.SizeBytes}."); + if (row.TypeIndex < 0 || string.IsNullOrWhiteSpace(row.ManagedTypeName)) + throw new InvalidOperationException($"Managed object {i} has unresolved managed type metadata."); + if (row.NativeObjectIndex < -1 || row.NativeObjectIndex >= data.NativeObjects.Count) + throw new InvalidOperationException($"Managed object {i} has invalid native_object_index {row.NativeObjectIndex}."); + } + + for (var i = 0; i < data.NativeObjects.Count; i++) + { + var row = data.NativeObjects[i]; + if (row.NativeObjectIndex != i) + throw new InvalidOperationException($"Native object index mismatch. expected={i}, actual={row.NativeObjectIndex}"); + } + + for (var i = 0; i < data.MemoryRegions.Count; i++) + { + var row = data.MemoryRegions[i]; + if (row.RegionIndex != i) + throw new InvalidOperationException($"Memory region index mismatch. expected={i}, actual={row.RegionIndex}"); + if (row.ParentRegionIndex >= data.MemoryRegions.Count) + throw new InvalidOperationException($"Memory region {i} has invalid parent_region_index {row.ParentRegionIndex}."); + if (row.FirstAllocationIndex >= data.NativeAllocations.Count) + throw new InvalidOperationException($"Memory region {i} has invalid first_allocation_index {row.FirstAllocationIndex}."); + if (row.NumAllocations < 0) + throw new InvalidOperationException($"Memory region {i} has negative num_allocations {row.NumAllocations}."); + } + + for (var i = 0; i < data.NativeAllocations.Count; i++) + { + var row = data.NativeAllocations[i]; + if (row.AllocationIndex != i) + throw new InvalidOperationException($"Native allocation index mismatch. expected={i}, actual={row.AllocationIndex}"); + if (row.MemoryRegionIndex >= data.MemoryRegions.Count) + throw new InvalidOperationException($"Native allocation {i} has invalid memory_region_index {row.MemoryRegionIndex}."); + } + + for (var i = 0; i < data.Connections.Count; i++) + { + var c = data.Connections[i]; + ValidateEndpoint(c.FromKind, c.FromIndex, data, $"connections[{i}].from"); + ValidateEndpoint(c.ToKind, c.ToIndex, data, $"connections[{i}].to"); + } + } + + private static void ValidateEndpoint(string kind, long index, RawSnapshotData data, string label) + { + if (kind == "managed_object") + { + if (index < 0 || index >= data.ManagedObjects.Count) + throw new InvalidOperationException($"{label} points to out-of-range managed object index {index}."); + return; + } + + if (kind == "native_object") + { + if (index < 0 || index >= data.NativeObjects.Count) + throw new InvalidOperationException($"{label} points to out-of-range native object index {index}."); + return; + } + + throw new InvalidOperationException($"{label} has unsupported endpoint kind '{kind}'."); + } +} diff --git a/Core/Report/Queries/DuckDbReportQueries.cs b/Core/Report/Queries/DuckDbReportQueries.cs new file mode 100644 index 0000000..80651da --- /dev/null +++ b/Core/Report/Queries/DuckDbReportQueries.cs @@ -0,0 +1,58 @@ +using DuckDB.NET.Data; + +namespace MemorySnapshotDataTools.Report.Queries; + +/// DuckDB implementation of . Opens the database at construction and executes report SQL via DuckDB.NET. +internal sealed class DuckDbReportQueries : IReportQueryBackend +{ + private readonly DuckDBConnection _connection; + + /// Opens a connection to the DuckDB database at the given path. + /// Path to the .duckdb file. + public DuckDbReportQueries(string dbPath) + { + _connection = new DuckDBConnection($"Data Source={dbPath}"); + _connection.Open(); + } + + /// + public ReportBackendDialect Dialect => ReportBackendDialect.DuckDb; + + /// + public (string[] Columns, List Rows) ExecuteQuery(string sql) + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = sql; + using var reader = cmd.ExecuteReader(); + var columns = new string[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + columns[i] = reader.GetName(i); + var rows = new List(); + while (reader.Read()) + { + var row = new object?[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + row[i] = reader.IsDBNull(i) ? null : reader.GetValue(i); + rows.Add(row); + } + return (columns, rows); + } + + /// + public bool HasColumn(string tableName, string columnName) + { + try + { + var (_, rows) = ExecuteQuery( + $"SELECT 1 FROM information_schema.columns WHERE table_schema = 'main' AND table_name = '{tableName.Replace("'", "''")}' AND column_name = '{columnName.Replace("'", "''")}' LIMIT 1"); + return rows.Count > 0; + } + catch + { + return false; + } + } + + /// + public void Dispose() => _connection.Dispose(); +} diff --git a/Core/Report/Queries/IReportQueryBackend.cs b/Core/Report/Queries/IReportQueryBackend.cs new file mode 100644 index 0000000..6fafafc --- /dev/null +++ b/Core/Report/Queries/IReportQueryBackend.cs @@ -0,0 +1,32 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// Database dialect used for report queries (affects SQL for e.g. LOG/rounding). +internal enum ReportBackendDialect +{ + /// DuckDB backend. + DuckDb, + + /// SQLite backend. + Sqlite, +} + +/// +/// Abstraction for running report queries against an exported snapshot database. +/// Implementations exist for DuckDB and SQLite so the report generator is backend-agnostic. +/// +internal interface IReportQueryBackend : IDisposable +{ + /// Dialect of the connected database (used to choose dialect-specific SQL). + ReportBackendDialect Dialect { get; } + + /// Executes the given SQL and returns column names and rows (null for missing values). + /// SQL query (single statement). + /// Column names and list of row arrays. + (string[] Columns, List Rows) ExecuteQuery(string sql); + + /// Returns whether the table has a column with the given name. + /// Table name. + /// Column name. + /// True if the column exists. + bool HasColumn(string tableName, string columnName); +} diff --git a/Core/Report/Queries/ReportQueryFactory.cs b/Core/Report/Queries/ReportQueryFactory.cs new file mode 100644 index 0000000..9dc8b67 --- /dev/null +++ b/Core/Report/Queries/ReportQueryFactory.cs @@ -0,0 +1,34 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// +/// Creates an based on the database file extension (.duckdb, .db, .sqlite, .sqlite3). +/// If extension is unknown, tries DuckDB first, then falls back to SQLite. +/// +internal static class ReportQueryFactory +{ + /// Opens the database at the given path and returns the appropriate query backend. + /// Path to the exported database file. + /// A backend connected to the database; caller must dispose. + public static IReportQueryBackend Create(string dbPath) + { + var ext = Path.GetExtension(dbPath).ToLowerInvariant(); + return ext switch + { + ".duckdb" => new DuckDbReportQueries(dbPath), + ".db" or ".sqlite" or ".sqlite3" => new SqliteReportQueries(dbPath), + _ => TryOpenAsDuckDb(dbPath), + }; + } + + private static IReportQueryBackend TryOpenAsDuckDb(string dbPath) + { + try + { + return new DuckDbReportQueries(dbPath); + } + catch + { + return new SqliteReportQueries(dbPath); + } + } +} diff --git a/Core/Report/Queries/ReportSql.cs b/Core/Report/Queries/ReportSql.cs new file mode 100644 index 0000000..cac5145 --- /dev/null +++ b/Core/Report/Queries/ReportSql.cs @@ -0,0 +1,449 @@ +namespace MemorySnapshotDataTools.Report.Queries; + +/// +/// SQL strings and helpers for report queries. Constants are used by ; dialect-specific methods (e.g. ) take . +/// +internal static class ReportSql +{ + /// Query for snapshot_info row (path, exported_at_utc, unity_version). + public const string SnapshotInfo = "SELECT snapshot_path, exported_at_utc, unity_version FROM snapshot_info;"; + + public const string TableCounts = """ + SELECT 'native_objects' AS table_name, COUNT(*) AS row_count FROM native_objects + UNION ALL SELECT 'managed_objects', COUNT(*) FROM managed_objects + UNION ALL SELECT 'connections', COUNT(*) FROM connections + UNION ALL SELECT 'native_roots', COUNT(*) FROM native_roots + UNION ALL SELECT 'memory_regions', COUNT(*) FROM memory_regions + UNION ALL SELECT 'native_allocations', COUNT(*) FROM native_allocations + ORDER BY 1; + """; + + public const string NativeOverview = """ + SELECT + COUNT(*) AS total_objects, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_native_mb, + ROUND(SUM(size_bytes) / 1024.0 / 1024 / 1024, 3) AS total_native_gb, + ROUND(AVG(size_bytes) / 1024.0, 2) AS avg_size_kb, + ROUND(MAX(size_bytes) / 1024.0 / 1024, 2) AS max_single_object_mb, + COUNT(DISTINCT native_type_name) AS distinct_types + FROM native_objects; + """; + + public const string NativeTypes = """ + SELECT + COALESCE(native_type_name, '(unknown)') AS type_name, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct_of_total + FROM native_objects + GROUP BY native_type_name + ORDER BY total_mb DESC + LIMIT 40; + """; + + /// Returns SQL for native object size distribution by log4 bucket. DuckDB uses LOG(4,x); SQLite uses log(x)/log(4). + /// Backend dialect for LOG function. + /// SQL string for the size bucket query. + public static string SizeBucketDistribution(ReportBackendDialect dialect) => dialect switch + { + ReportBackendDialect.DuckDb => """ + SELECT + CAST(FLOOR(LOG(4, NULLIF(size_bytes, 0))) AS INTEGER) AS log4_bucket, + ROUND(POWER(4.0, CAST(FLOOR(LOG(4, NULLIF(size_bytes, 0))) AS INTEGER)) / 1024.0 / 1024, 3) AS bucket_floor_mb, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb + FROM native_objects + WHERE size_bytes > 0 + GROUP BY log4_bucket + ORDER BY log4_bucket DESC; + """, + ReportBackendDialect.Sqlite => """ + SELECT + CAST(FLOOR(CAST(log(NULLIF(size_bytes, 0)) / log(4) AS REAL)) AS INTEGER) AS log4_bucket, + ROUND(POWER(4.0, CAST(FLOOR(CAST(log(NULLIF(size_bytes, 0)) / log(4) AS REAL)) AS INTEGER)) / 1024.0 / 1024, 3) AS bucket_floor_mb, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb + FROM native_objects + WHERE size_bytes > 0 + GROUP BY log4_bucket + ORDER BY log4_bucket DESC; + """, + _ => throw new ArgumentOutOfRangeException(nameof(dialect)), + }; + + public const string TopNativeObjects = """ + SELECT + native_object_index, + COALESCE(name, '(unnamed)') AS name, + COALESCE(native_type_name, '(unknown)') AS type_name, + ROUND(size_bytes / 1024.0 / 1024, 3) AS size_mb + FROM native_objects + ORDER BY size_bytes DESC + LIMIT 50; + """; + + public const string NativeTypesTop5Pct = """ + SELECT ROUND(SUM(pct), 1) AS top5_pct + FROM ( + SELECT ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct + FROM native_objects + GROUP BY native_type_name + ORDER BY SUM(size_bytes) DESC + LIMIT 5 + ) t; + """; + + public const string DuplicateAssets = """ + SELECT + COALESCE(name, '(unnamed)') AS name, + COALESCE(native_type_name, '(unknown)') AS type_name, + COUNT(*) AS duplicate_count, + ROUND(MIN(size_bytes) / 1024.0 / 1024, 3) AS min_size_mb, + ROUND(MAX(size_bytes) / 1024.0 / 1024, 3) AS max_size_mb, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 3) AS total_mb, + ROUND((COUNT(*) - 1) * AVG(size_bytes) / 1024.0 / 1024, 3) AS wasted_mb + FROM native_objects + WHERE name IS NOT NULL + GROUP BY name, native_type_name + HAVING COUNT(*) > 1 + ORDER BY wasted_mb DESC + LIMIT 50; + """; + + public const string DuplicateSummary = """ + SELECT + COUNT(*) AS duplicate_groups, + SUM(dup_count) - COUNT(*) AS extra_instances, + ROUND(SUM(wasted_bytes) / 1024.0 / 1024, 2) AS total_wasted_mb, + ROUND(100.0 * SUM(wasted_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), 1) AS pct_of_native_total + FROM ( + SELECT COUNT(*) AS dup_count, (COUNT(*) - 1) * AVG(size_bytes) AS wasted_bytes + FROM native_objects + WHERE name IS NOT NULL + GROUP BY name, native_type_name + HAVING COUNT(*) > 1 + ) t; + """; + + public const string ManagedOverview = """ + SELECT + COUNT(*) AS total_objects, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_managed_mb, + ROUND(AVG(size_bytes), 0) AS avg_size_bytes, + COUNT(DISTINCT managed_type_name) AS distinct_types, + COUNT(native_object_index) AS objects_with_native_ref + FROM managed_objects; + """; + + public const string ManagedTypes = """ + SELECT + COALESCE(managed_type_name, '(unknown)') AS type_name, + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF(SUM(SUM(size_bytes)) OVER (), 0), 2) AS pct_of_total + FROM managed_objects + GROUP BY managed_type_name + ORDER BY total_mb DESC + LIMIT 40; + """; + + public const string NativeRootsByArea = """ + SELECT + COALESCE(area_name, '(unknown)') AS area_name, + COUNT(*) AS root_count, + ROUND(SUM(accumulated_size_bytes) / 1024.0 / 1024, 2) AS total_accumulated_mb + FROM native_roots + GROUP BY area_name + ORDER BY total_accumulated_mb DESC; + """; + + public const string NativeRootsTop = """ + SELECT + root_id, + COALESCE(area_name, '(unknown)') AS area_name, + COALESCE(object_name, '(unnamed)') AS object_name, + ROUND(accumulated_size_bytes / 1024.0 / 1024, 3) AS accumulated_mb + FROM native_roots + ORDER BY accumulated_size_bytes DESC + LIMIT 30; + """; + + public const string MemoryRegions = """ + SELECT + r.region_index, + COALESCE(r.name, '(unnamed)') AS region_name, + COALESCE(p.name, '—') AS parent_name, + ROUND(r.address_size / 1024.0 / 1024, 2) AS size_mb, + r.num_allocations + FROM memory_regions r + LEFT JOIN memory_regions p ON p.region_index = r.parent_region_index + ORDER BY r.address_size DESC + LIMIT 40; + """; + + public const string AllocationEfficiency = """ + SELECT + COALESCE(r.name, '(unnamed)') AS region_name, + r.num_allocations, + ROUND(r.address_size / 1024.0 / 1024, 2) AS region_size_mb, + ROUND(SUM(a.size_bytes) / 1024.0 / 1024, 2) AS payload_mb, + ROUND(SUM(a.overhead_size_bytes) / 1024.0 / 1024, 2) AS overhead_mb, + ROUND(SUM(a.padding_size_bytes) / 1024.0 / 1024, 2) AS padding_mb, + ROUND(100.0 * SUM(a.size_bytes) / NULLIF(r.address_size, 0), 1) AS utilization_pct + FROM memory_regions r + LEFT JOIN native_allocations a ON a.memory_region_index = r.region_index + GROUP BY r.region_index, r.name, r.address_size, r.num_allocations + HAVING SUM(a.size_bytes) IS NOT NULL + ORDER BY payload_mb DESC NULLS LAST + LIMIT 30; + """; + + public const string ConnectionTypes = """ + SELECT connection_type, COUNT(*) AS edge_count + FROM connections + GROUP BY connection_type + ORDER BY edge_count DESC; + """; + + public const string MostReferenced = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.from_index) AS inbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.to_index + WHERE c.to_kind = 'native_object' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY inbound_refs DESC + LIMIT 20; + """; + + public const string MostReferencedExclMonoScript = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.from_index) AS inbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.to_index + WHERE c.to_kind = 'native_object' AND n.native_type_name != 'MonoScript' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY inbound_refs DESC + LIMIT 20; + """; + + public const string MostOutbound = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS type_name, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS size_mb, + COUNT(c.to_index) AS outbound_refs + FROM connections c + JOIN native_objects n ON n.native_object_index = c.from_index + WHERE c.from_kind = 'native_object' + GROUP BY n.native_object_index, n.name, n.native_type_name, n.size_bytes + ORDER BY outbound_refs DESC + LIMIT 20; + """; + + public const string Top50Summary = """ + SELECT + COUNT(*) AS obj_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS total_mb, + ROUND(100.0 * SUM(size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), 1) AS pct_of_native_total + FROM (SELECT size_bytes FROM native_objects ORDER BY size_bytes DESC LIMIT 50) t; + """; + + // --------------------------------------------------------------------------- + // Leaked Shell analysis + // Pattern A: native object still in memory but is_destroyed=true + // Pattern B: native object freed; managed C# wrapper is orphaned (native_object_index IS NULL) + // --------------------------------------------------------------------------- + + public const string LeakedBByType = """ + SELECT + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + COUNT(1) AS leaked_count + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ) + GROUP BY 1 + ORDER BY leaked_count DESC; + """; + + public const string LeakedBStats = """ + SELECT COUNT(1) AS total_orphaned + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ); + """; + + public const string LeakedCombined = """ + SELECT + pattern, + COALESCE(native_type_name, 'unknown (freed)') AS native_type_name, + managed_type_name, + leaked_count, + ROUND(native_mb_retained, 2) AS native_mb_retained + FROM ( + SELECT + 'A: Destroyed (native still in memory)' AS pattern, + n.native_type_name, + m.managed_type_name, + COUNT(1) AS leaked_count, + SUM(n.size_bytes) / 1024.0 / 1024 AS native_mb_retained + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + GROUP BY 2, 3 + + UNION ALL + + SELECT + 'B: Orphaned (native freed)', + 'unknown (freed)', + m.managed_type_name, + COUNT(1), + 0.0 + FROM managed_objects m + WHERE m.native_object_index IS NULL + AND EXISTS ( + SELECT 1 FROM managed_objects m2 + WHERE m2.managed_type_name = m.managed_type_name + AND m2.native_object_index IS NOT NULL + ) + GROUP BY 3 + ) combined + ORDER BY leaked_count DESC; + """; + + public const string LeakedAStats = """ + SELECT + COUNT(*) AS total_leaked_count, + ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained, + ROUND( + 100.0 * SUM(n.size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), + 1 + ) AS pct_of_native_total + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true; + """; + + public const string LeakedAByType = """ + SELECT + COALESCE(n.native_type_name, '(unknown)') AS native_type, + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + COUNT(1) AS leaked_count, + ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + GROUP BY 1, 2 + ORDER BY native_mb_retained DESC; + """; + + public const string LeakedATopObjects = """ + SELECT + n.native_object_index, + COALESCE(n.name, '(unnamed)') AS name, + COALESCE(n.native_type_name, '(unknown)') AS native_type, + COALESCE(m.managed_type_name, '(unknown)') AS managed_type, + ROUND(n.size_bytes / 1024.0 / 1024, 2) AS own_size_mb + FROM managed_objects m + JOIN native_objects n ON m.native_object_index = n.native_object_index + WHERE n.is_destroyed = true + ORDER BY n.size_bytes DESC + LIMIT 20; + """; + + public const string AllDestroyedNatives = """ + SELECT + COALESCE(native_type_name, '(unknown)') AS native_type, + COUNT(1) AS destroyed_count, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained + FROM native_objects + WHERE is_destroyed = true + GROUP BY 1 + ORDER BY native_mb_retained DESC; + """; + + public const string AllDestroyedStats = """ + SELECT + COUNT(*) AS total_destroyed, + ROUND(SUM(size_bytes) / 1024.0 / 1024, 2) AS native_mb_retained, + ROUND( + 100.0 * SUM(size_bytes) / NULLIF((SELECT SUM(size_bytes) FROM native_objects), 0), + 1 + ) AS pct_of_native_total + FROM native_objects + WHERE is_destroyed = true; + """; + + /// + /// Returns SQL for downstream_mb and exclusive_mb for a single native root. + /// rootIdx must be from our own query result (safe to interpolate). + /// + public static string DownstreamStats(long rootIdx) + { + return $""" + WITH RECURSIVE + reachable(node_index) AS ( + SELECT c.to_index + FROM connections c + WHERE c.from_index = {rootIdx} + AND c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + UNION + SELECT c.to_index + FROM reachable r + JOIN connections c ON c.from_index = r.node_index + WHERE c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + ), + reachable_set AS (SELECT DISTINCT node_index FROM reachable), + externally_referenced AS ( + SELECT DISTINCT c.to_index AS node_index + FROM connections c + JOIN reachable_set rs_to ON rs_to.node_index = c.to_index + LEFT JOIN reachable_set rs_from ON rs_from.node_index = c.from_index + WHERE c.from_kind = 'native_object' + AND c.to_kind = 'native_object' + AND c.connection_type = 'native_connection' + AND c.from_index != {rootIdx} + AND rs_from.node_index IS NULL + ), + exclusive_set AS ( + SELECT rs.node_index + FROM reachable_set rs + LEFT JOIN externally_referenced ext ON ext.node_index = rs.node_index + WHERE ext.node_index IS NULL + ) + SELECT + COALESCE( + (SELECT ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) + FROM reachable_set rs + JOIN native_objects n ON n.native_object_index = rs.node_index), + 0.0) AS downstream_mb, + COALESCE( + (SELECT ROUND(SUM(n.size_bytes) / 1024.0 / 1024, 2) + FROM exclusive_set es + JOIN native_objects n ON n.native_object_index = es.node_index), + 0.0) AS exclusive_mb; + """; + } +} diff --git a/Core/Report/Queries/SqliteReportQueries.cs b/Core/Report/Queries/SqliteReportQueries.cs new file mode 100644 index 0000000..d3b8240 --- /dev/null +++ b/Core/Report/Queries/SqliteReportQueries.cs @@ -0,0 +1,61 @@ +using Microsoft.Data.Sqlite; + +namespace MemorySnapshotDataTools.Report.Queries; + +/// SQLite implementation of . Opens the database at construction and executes report SQL via Microsoft.Data.Sqlite. +internal sealed class SqliteReportQueries : IReportQueryBackend +{ + private readonly SqliteConnection _connection; + + /// Opens a connection to the SQLite database at the given path. + /// Path to the .db or .sqlite file. + public SqliteReportQueries(string dbPath) + { + _connection = new SqliteConnection($"Data Source={dbPath}"); + _connection.Open(); + } + + /// + public ReportBackendDialect Dialect => ReportBackendDialect.Sqlite; + + /// + public (string[] Columns, List Rows) ExecuteQuery(string sql) + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = sql; + using var reader = cmd.ExecuteReader(); + var columns = new string[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + columns[i] = reader.GetName(i); + var rows = new List(); + while (reader.Read()) + { + var row = new object?[reader.FieldCount]; + for (var i = 0; i < reader.FieldCount; i++) + row[i] = reader.IsDBNull(i) ? null : reader.GetValue(i); + rows.Add(row); + } + return (columns, rows); + } + + /// + public bool HasColumn(string tableName, string columnName) + { + try + { + using var cmd = _connection.CreateCommand(); + cmd.CommandText = "SELECT name FROM pragma_table_info($t) WHERE name = $c"; + cmd.Parameters.AddWithValue("$t", tableName); + cmd.Parameters.AddWithValue("$c", columnName); + using var reader = cmd.ExecuteReader(); + return reader.Read(); + } + catch + { + return false; + } + } + + /// + public void Dispose() => _connection.Dispose(); +} diff --git a/Core/Report/ReportBuilder.cs b/Core/Report/ReportBuilder.cs new file mode 100644 index 0000000..9997c2d --- /dev/null +++ b/Core/Report/ReportBuilder.cs @@ -0,0 +1,365 @@ +using System.Globalization; +using System.Linq; +using MemorySnapshotDataTools.Report.Queries; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Builds a by executing report SQL via and assembling groups/sections +/// (snapshot info, native objects, managed heap, connections, roots, memory regions). Uses for table and insight HTML. +/// +internal static class ReportBuilder +{ + /// + /// Runs all report queries against the backend, maps results into sections and groups, and returns a fully populated report model. + /// + /// Query backend (DuckDB or SQLite) connected to the report database. + /// Report title. + /// Database path (for display). + /// Generated timestamp string (UTC). + /// Populated ready for . + public static ReportModel Build(IReportQueryBackend backend, string title, string dbPath, string generatedAtUtc) + { + var model = new ReportModel + { + Title = title, + DbPath = dbPath, + GeneratedAtUtc = generatedAtUtc, + }; + + var (infoCols, infoRows) = backend.ExecuteQuery(ReportSql.SnapshotInfo); + var (countCols, countRows) = backend.ExecuteQuery(ReportSql.TableCounts); + + var kv = new Dictionary(); + if (infoRows.Count > 0) + { + var r = infoRows[0]; + kv["Snapshot Path"] = r.Length > 0 ? r[0] : null; + kv["Exported At (UTC)"] = r.Length > 1 ? r[1] : null; + kv["Unity Version"] = r.Length > 2 ? r[2] : null; + } + kv["Report Generated"] = generatedAtUtc; + + var totalRows = countRows.Sum(row => row.Length > 1 && row[1] != null ? Convert.ToInt64(row[1]) : 0); + var insightSnap = ReportHtmlHelper.RenderInsight( + $"Snapshot captured from Unity {ReportHtmlHelper.Escape(kv.GetValueOrDefault("Unity Version") ?? "—")} " + + $"containing {ReportHtmlHelper.FmtNum(totalRows)} rows across {ReportHtmlHelper.FmtNum(countRows.Count)} tables. " + + "If table counts appear unexpectedly low, verify the snapshot was captured with Capture All Objects and native memory collection enabled."); + + var snapContent = insightSnap + ReportHtmlHelper.RenderKv(kv) + ReportHtmlHelper.RenderTable(countCols, countRows); + var snapSection = new ReportSection + { + Anchor = "snapshot-info", + SectionTitle = "📋 Source & Table Counts", + ContentHtml = ReportHtmlHelper.Section("snapshot-info", "📋 Source & Table Counts", snapContent, null), + }; + var snapGroup = new ReportGroup + { + GroupTitle = "📸 Snapshot Info", + GroupDesc = "", + }; + snapGroup.Sections.Add(snapSection); + AddNav(model, snapGroup); + model.Groups.Add(snapGroup); + + // Native Objects group + var (natOvCols, natOvRows) = backend.ExecuteQuery(ReportSql.NativeOverview); + var (natTyCols, natTyRows) = backend.ExecuteQuery(ReportSql.NativeTypes); + var (bktCols, bktRows) = backend.ExecuteQuery(ReportSql.SizeBucketDistribution(backend.Dialect)); + var (top50Cols, top50Rows) = backend.ExecuteQuery(ReportSql.TopNativeObjects); + var (dupCols, dupRows) = backend.ExecuteQuery(ReportSql.DuplicateAssets); + var (dsCols, dsRows) = backend.ExecuteQuery(ReportSql.DuplicateSummary); + var (t50Cols, t50Rows) = backend.ExecuteQuery(ReportSql.Top50Summary); + var (_, top5PctRows) = natTyRows.Count > 0 ? backend.ExecuteQuery(ReportSql.NativeTypesTop5Pct) : (Array.Empty(), new List()); + var top5PctVal = top5PctRows.Count > 0 && top5PctRows[0].Length > 0 ? top5PctRows[0][0] : null; + + var natTotalObjects = natOvRows.Count > 0 && natOvRows[0].Length > 0 ? natOvRows[0][0] : 0; + var natTotalMb = natOvRows.Count > 0 && natOvRows[0].Length > 1 ? natOvRows[0][1] : 0; + var natAvgKb = natOvRows.Count > 0 && natOvRows[0].Length > 3 ? natOvRows[0][3] : 0; + var natMaxMb = natOvRows.Count > 0 && natOvRows[0].Length > 4 ? natOvRows[0][4] : 0; + var natDistinctTypes = natOvRows.Count > 0 && natOvRows[0].Length > 5 ? natOvRows[0][5] : 0; + + var insightNatOv = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(natTotalObjects)} native objects across " + + $"{ReportHtmlHelper.FmtNum(natDistinctTypes)} types occupy {ReportHtmlHelper.FmtNum(natTotalMb)} MB total " + + $"(avg {ReportHtmlHelper.FmtNum(natAvgKb)} KB; largest single object {ReportHtmlHelper.FmtNum(natMaxMb)} MB). " + + "This is your native memory baseline — compare it against your platform budget to gauge whether a reduction pass is needed."); + + var top5Names = natTyRows.Count > 0 ? string.Join(", ", natTyRows.Take(5).Select(r => r[0]?.ToString() ?? "—")) : "—"; + var insightTypes = ReportHtmlHelper.RenderInsight( + $"The top 5 types — {ReportHtmlHelper.Escape(top5Names)} — account for {ReportHtmlHelper.FmtNum(top5PctVal)}% of all native memory. " + + "These types are your highest-leverage optimization targets."); + + var t50Mb = t50Rows.Count > 0 && t50Rows[0].Length > 1 ? t50Rows[0][1] : 0; + var t50Pct = t50Rows.Count > 0 && t50Rows[0].Length > 2 ? t50Rows[0][2] : 0; + var insightTop50 = ReportHtmlHelper.RenderInsight( + $"The 50 largest individual objects account for {ReportHtmlHelper.FmtNum(t50Mb)} MB{ReportHtmlHelper.FmtNum(t50Pct)}% of all native memory. " + + "A small number of objects driving a large share of memory means optimizing even one large asset can have measurable impact.", + pills: [("Objects shown", "50", ""), ("Combined size", $"{t50Mb}", ""), ("% of native total", $"{t50Pct}%", "")]); + + var dsGroups = dsRows.Count > 0 && dsRows[0].Length > 0 ? dsRows[0][0] : 0; + var dsWastedMb = dsRows.Count > 0 && dsRows[0].Length > 2 ? dsRows[0][2] : 0; + var dsPct = dsRows.Count > 0 && dsRows[0].Length > 3 ? dsRows[0][3] : 0; + var insightDups = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(dsGroups)} asset name-collision groups were found, " + + $"with an upper-bound waste estimate of {ReportHtmlHelper.FmtNum(dsWastedMb)} MB ({ReportHtmlHelper.FmtNum(dsPct)}% of native memory). " + + "True asset duplication wastes memory proportional to its count.", + pills: [ + ("Name-collision groups", ReportHtmlHelper.FmtNum(dsGroups), dsGroups is int i && i > 0 ? "warn" : "good"), + ("Est. wasted memory", $"{dsWastedMb} MB", "warn"), + ]); + + var nativeGroup = new ReportGroup { GroupTitle = "🧱 Native Objects", GroupDesc = "Native Unity objects — types, sizes, and duplication" }; + nativeGroup.Sections.Add(new ReportSection { Anchor = "native-overview", SectionTitle = "📊 Overview", ContentHtml = ReportHtmlHelper.Section("native-overview", "📊 Overview", insightNatOv + ReportHtmlHelper.RenderTable(natOvCols, natOvRows), null) }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "native-types", SectionTitle = "🏆 Top Types by Size", ContentHtml = ReportHtmlHelper.Section("native-types", "🏆 Top Types by Size", insightTypes + ReportHtmlHelper.RenderTable(natTyCols, natTyRows, truncateCols: new HashSet { "type_name" }), natTyRows.Count), RowCount = natTyRows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "size-buckets", SectionTitle = "📐 Size Distribution (log₄)", ContentHtml = ReportHtmlHelper.Section("size-buckets", "📐 Size Distribution (log₄)", ReportHtmlHelper.RenderTable(bktCols, bktRows), bktRows.Count), RowCount = bktRows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "top-objects", SectionTitle = "🔝 Top 50 Largest Objects", ContentHtml = ReportHtmlHelper.Section("top-objects", "🔝 Top 50 Largest Objects", insightTop50 + ReportHtmlHelper.RenderTable(top50Cols, top50Rows, truncateCols: new HashSet { "name" }), top50Rows.Count), RowCount = top50Rows.Count }); + nativeGroup.Sections.Add(new ReportSection { Anchor = "duplicates", SectionTitle = "⚠️ Duplicate Assets", ContentHtml = ReportHtmlHelper.Section("duplicates", "⚠️ Duplicate Assets", insightDups + ReportHtmlHelper.RenderTable(dupCols, dupRows, warnCol: "wasted_mb", truncateCols: new HashSet { "name" }), dupRows.Count), RowCount = dupRows.Count }); + AddNav(model, nativeGroup); + model.Groups.Add(nativeGroup); + + // Managed Heap + var (mgOvCols, mgOvRows) = backend.ExecuteQuery(ReportSql.ManagedOverview); + var (mgTyCols, mgTyRows) = backend.ExecuteQuery(ReportSql.ManagedTypes); + var mgTotal = mgOvRows.Count > 0 ? mgOvRows[0][0] : 0; + var mgMb = mgOvRows.Count > 0 ? mgOvRows[0][1] : 0; + var mgTypes = mgOvRows.Count > 0 && mgOvRows[0].Length > 3 ? mgOvRows[0][3] : 0; + var mgBridged = mgOvRows.Count > 0 && mgOvRows[0].Length > 4 ? mgOvRows[0][4] : 0; + var insightMgOv = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(mgTotal)} managed objects across {ReportHtmlHelper.FmtNum(mgTypes)} types occupy {ReportHtmlHelper.FmtNum(mgMb)} MB; " + + $"{ReportHtmlHelper.FmtNum(mgBridged)} have a corresponding native object. " + + "Large managed heaps increase GC pressure."); + var topMg = mgTyRows.Count > 0 ? mgTyRows[0][0]?.ToString() ?? "—" : "—"; + var topMgMb = mgTyRows.Count > 0 && mgTyRows[0].Length > 2 ? mgTyRows[0][2] : 0; + var insightMgTy = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.Escape(topMg)} is the largest managed allocator at {ReportHtmlHelper.FmtNum(topMgMb)} MB. " + + "This type is the primary driver of managed heap size and therefore GC pause duration."); + + var managedGroup = new ReportGroup { GroupTitle = "🧠 Managed Heap", GroupDesc = "GC-managed objects and type allocations" }; + managedGroup.Sections.Add(new ReportSection { Anchor = "managed-overview", SectionTitle = "📊 Overview", ContentHtml = ReportHtmlHelper.Section("managed-overview", "📊 Overview", insightMgOv + ReportHtmlHelper.RenderTable(mgOvCols, mgOvRows), null) }); + managedGroup.Sections.Add(new ReportSection { Anchor = "managed-types", SectionTitle = "🏆 Top Types by Size", ContentHtml = ReportHtmlHelper.Section("managed-types", "🏆 Top Types by Size", insightMgTy + ReportHtmlHelper.RenderTable(mgTyCols, mgTyRows, truncateCols: new HashSet { "type_name" }), mgTyRows.Count), RowCount = mgTyRows.Count }); + AddNav(model, managedGroup); + model.Groups.Add(managedGroup); + + // Leaked Shells + var hasIsDestroyed = backend.HasColumn("native_objects", "is_destroyed"); + var (lbCols, lbRows) = backend.ExecuteQuery(ReportSql.LeakedBByType); + var (lbsCols, lbsRows) = backend.ExecuteQuery(ReportSql.LeakedBStats); + var bTotal = lbsRows.Count > 0 && lbsRows[0].Length > 0 ? Convert.ToInt64(lbsRows[0][0] ?? 0) : 0L; + var topBType = lbRows.Count > 0 && lbRows[0].Length > 0 ? lbRows[0][0]?.ToString() ?? "—" : "—"; + var topBCount = lbRows.Count > 0 && lbRows[0].Length > 1 ? lbRows[0][1] : (object?)0; + var insightLb = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(bTotal)} orphaned managed wrappers detected — " + + "C# objects whose native counterpart was completely freed but whose GC references were never cleared. " + + $"The most common type is {ReportHtmlHelper.Escape(topBType)} " + + $"with {ReportHtmlHelper.FmtNum(topBCount)} instances. " + + "These objects waste managed heap space and GC scan time despite having no functional native backing. " + + "Fix by hooking OnDestroy (or equivalent) and nulling all strong C# references so the GC " + + "can collect them."); + + var leakedSections = new List(); + if (hasIsDestroyed) + { + var (lcCols, lcRows) = backend.ExecuteQuery(ReportSql.LeakedCombined); + var (lasCols, lasRows) = backend.ExecuteQuery(ReportSql.LeakedAStats); + var laTotal = lasRows.Count > 0 && lasRows[0].Length > 0 ? Convert.ToInt64(lasRows[0][0] ?? 0) : 0L; + var laMb = lasRows.Count > 0 && lasRows[0].Length > 1 ? ToDouble(lasRows[0][1]) : 0.0; + var laPct = lasRows.Count > 0 && lasRows[0].Length > 2 ? ToDouble(lasRows[0][2]) : 0.0; + var combinedTotal = laTotal + bTotal; + var insightLc = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(combinedTotal)} leaked C# shell objects detected: " + + $"{ReportHtmlHelper.FmtNum(laTotal)} Pattern A (native destroyed but still occupying " + + $"{laMb:N2} MB of native memory) and " + + $"{ReportHtmlHelper.FmtNum(bTotal)} Pattern B (native fully freed, managed wrapper orphaned). " + + "Leaked shells waste memory and can cause MissingReferenceException crashes at runtime. " + + "Prioritise Pattern A by native_mb_retained — each MB is real engine memory the runtime " + + "cannot reclaim until the C# reference chain is broken.", + pills: [ + ("Pattern A (destroyed)", ReportHtmlHelper.FmtNum(laTotal), laTotal > 0 ? "warn" : "good"), + ("Native MB retained", $"{laMb:N2} MB", laMb > 0 ? "warn" : "good"), + ("Pattern B (orphaned)", ReportHtmlHelper.FmtNum(bTotal), bTotal > 0 ? "warn" : "good"), + ]); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-summary", + SectionTitle = "📊 Summary (Both Patterns)", + ContentHtml = ReportHtmlHelper.Section("leaked-summary", "📊 Summary (Both Patterns)", insightLc + ReportHtmlHelper.RenderTable(lcCols, lcRows, truncateCols: new HashSet { "managed_type_name" }), lcRows.Count), + RowCount = lcRows.Count, + }); + + var (latCols, latRows) = backend.ExecuteQuery(ReportSql.LeakedAByType); + var topLatNative = latRows.Count > 0 && latRows[0].Length > 0 ? latRows[0][0]?.ToString() ?? "—" : "—"; + var topLatMb = latRows.Count > 0 && latRows[0].Length > 3 ? ToDouble(latRows[0][3]) : 0.0; + var insightLat = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(latRows.Count)} native/managed type pair(s) show Pattern A leaks. " + + "The worst offender by retained memory is " + + $"{ReportHtmlHelper.Escape(topLatNative)} holding " + + $"{topLatMb:N2} MB despite being destroyed. " + + "These native objects remain alive because managed C# references block GC collection. " + + "Track down the code paths that hold a reference to these types after Destroy() — " + + "common culprits: static caches, event listener captures, and async/coroutine closures."); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-a-types", + SectionTitle = "💥 Pattern A: Destroyed-but-Retained (by Type)", + ContentHtml = ReportHtmlHelper.Section("leaked-a-types", "💥 Pattern A: Destroyed-but-Retained (by Type)", insightLat + ReportHtmlHelper.RenderTable(latCols, latRows, truncateCols: new HashSet { "managed_type" }), latRows.Count), + RowCount = latRows.Count, + }); + + var (laoColsRaw, laoRows) = backend.ExecuteQuery(ReportSql.LeakedATopObjects); + var augLaoCols = laoColsRaw.Concat(new[] { "downstream_mb", "exclusive_mb", "total_freed_mb" }).ToArray(); + var augLaoRows = new List(); + foreach (var row in laoRows) + { + var rootIdx = row.Length > 0 ? Convert.ToInt64(row[0] ?? 0) : 0L; + var ownMb = row.Length > 4 ? ToDouble(row[4]) : 0.0; + var (downstreamCols, downstreamRows) = backend.ExecuteQuery(ReportSql.DownstreamStats(rootIdx)); + var dsMb = downstreamRows.Count > 0 && downstreamRows[0].Length > 0 ? ToDouble(downstreamRows[0][0]) : 0.0; + var exclMb = downstreamRows.Count > 0 && downstreamRows[0].Length > 1 ? ToDouble(downstreamRows[0][1]) : 0.0; + var totalFreed = Math.Round(ownMb + exclMb, 2); + augLaoRows.Add(row.Concat(new object?[] { Math.Round(dsMb, 2), Math.Round(exclMb, 2), totalFreed }).ToArray()); + } + augLaoRows = augLaoRows.OrderByDescending(r => r.Length > 0 ? ToDouble(r[^1]) : 0.0).ToList(); + + var topLaoName = augLaoRows.Count > 0 && augLaoRows[0].Length > 1 ? augLaoRows[0][1]?.ToString() ?? "—" : "—"; + var topLaoOwn = augLaoRows.Count > 0 && augLaoRows[0].Length > 4 ? ToDouble(augLaoRows[0][4]) : 0.0; + var topLaoExcl = augLaoRows.Count > 0 && augLaoRows[0].Length >= 2 ? ToDouble(augLaoRows[0][^2]) : 0.0; + var topLaoFreed = augLaoRows.Count > 0 && augLaoRows[0].Length > 0 ? ToDouble(augLaoRows[0][^1]) : 0.0; + var topLaoNameTrunc = topLaoName.Length > 30 ? topLaoName[..30] : topLaoName; + var insightLao = ReportHtmlHelper.RenderInsight( + "Top Pattern A leaked objects ranked by total_freed_mb " + + "(own size + exclusively-owned downstream memory). " + + "The highest-impact leak is " + + $"{ReportHtmlHelper.Escape(topLaoName)}: " + + $"fixing it would free {topLaoFreed:N2} MB total " + + $"({topLaoOwn:N2} MB own + {topLaoExcl:N2} MB exclusive downstream). " + + "exclusive_mb counts only downstream objects reachable solely through this object " + + "— assets shared with other live objects are excluded, so this is a conservative lower bound. " + + "Prioritise objects with large total_freed_mb: ensure Destroy() is always " + + "paired with reference nulling, and that no event listeners or coroutines capture a reference " + + "past the object's intended lifetime.", + pills: [ + ("Top leak", topLaoNameTrunc, "warn"), + ("Own size", $"{topLaoOwn:N2} MB", "warn"), + ("Excl. downstream", $"{topLaoExcl:N2} MB", "warn"), + ("Total freed", $"{topLaoFreed:N2} MB", "warn"), + ]); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-a-objects", + SectionTitle = "🔬 Pattern A: Top Individual Leaks + Exclusive Cost", + ContentHtml = ReportHtmlHelper.Section("leaked-a-objects", "🔬 Pattern A: Top Individual Leaks + Exclusive Cost", insightLao + ReportHtmlHelper.RenderTable(augLaoCols, augLaoRows, warnCol: "total_freed_mb", truncateCols: new HashSet { "name" }), augLaoRows.Count), + RowCount = augLaoRows.Count, + }); + + var (adnCols, adnRows) = backend.ExecuteQuery(ReportSql.AllDestroyedNatives); + var (adnsCols, adnsRows) = backend.ExecuteQuery(ReportSql.AllDestroyedStats); + var adnTotal = adnsRows.Count > 0 && adnsRows[0].Length > 0 ? Convert.ToInt64(adnsRows[0][0] ?? 0) : 0L; + var adnMb = adnsRows.Count > 0 && adnsRows[0].Length > 1 ? ToDouble(adnsRows[0][1]) : 0.0; + var adnPct = adnsRows.Count > 0 && adnsRows[0].Length > 2 ? ToDouble(adnsRows[0][2]) : 0.0; + var insightAdn = ReportHtmlHelper.RenderInsight( + $"{ReportHtmlHelper.FmtNum(adnTotal)} native objects across " + + $"{ReportHtmlHelper.FmtNum(adnRows.Count)} type(s) carry is_destroyed=true, " + + $"retaining {adnMb:N2} MB ({adnPct:N1}% of total native memory). " + + "This is the full native cost of pending destructions — Pattern A leaks are a subset of this " + + "(only those with a surviving managed wrapper). " + + "A high count here that drops significantly after calling " + + "Resources.UnloadUnusedAssets() + GC.Collect() indicates the allocator " + + "is cleaning up but GC hasn't run yet; a persistently high count across snapshots points to " + + "genuine managed-side leaks blocking reclaim."); + leakedSections.Add(new ReportSection + { + Anchor = "all-destroyed", + SectionTitle = "🗑️ All Destroyed Natives (by Type)", + ContentHtml = ReportHtmlHelper.Section("all-destroyed", "🗑️ All Destroyed Natives (by Type)", insightAdn + ReportHtmlHelper.RenderTable(adnCols, adnRows), adnRows.Count), + RowCount = adnRows.Count, + }); + } + else + { + var schemaNoticeContent = ReportHtmlHelper.RenderInsight( + "Pattern A analysis (destroyed-but-retained natives) requires the " + + "is_destroyed column which is not present in this database. " + + "Re-export the snapshot with the latest version of the exporter to enable this analysis. " + + "Pattern B (orphaned managed wrappers) below is available without it."); + leakedSections.Add(new ReportSection + { + Anchor = "leaked-schema-notice", + SectionTitle = "⚠️ Schema Notice", + ContentHtml = ReportHtmlHelper.Section("leaked-schema-notice", "⚠️ Schema Notice", schemaNoticeContent, null), + }); + } + + leakedSections.Add(new ReportSection + { + Anchor = "leaked-b", + SectionTitle = "👻 Pattern B: Orphaned Managed Wrappers", + ContentHtml = ReportHtmlHelper.Section("leaked-b", "👻 Pattern B: Orphaned Managed Wrappers", insightLb + ReportHtmlHelper.RenderTable(lbCols, lbRows), lbRows.Count), + RowCount = lbRows.Count, + }); + + var leakedGroup = new ReportGroup + { + GroupTitle = "🧟 Leaked Shells", + GroupDesc = "C# managed wrappers alive past their native object's destruction", + }; + foreach (var sec in leakedSections) + leakedGroup.Sections.Add(sec); + AddNav(model, leakedGroup); + model.Groups.Add(leakedGroup); + + // Native Roots + var (nrAreaCols, nrAreaRows) = backend.ExecuteQuery(ReportSql.NativeRootsByArea); + var (nrTopCols, nrTopRows) = backend.ExecuteQuery(ReportSql.NativeRootsTop); + var insightRoots = ReportHtmlHelper.RenderInsight("Native roots by area and top 30 by retained size."); + var rootsGroup = new ReportGroup { GroupTitle = "📍 Native Roots", GroupDesc = "Root references and retained size" }; + rootsGroup.Sections.Add(new ReportSection { Anchor = "roots-area", SectionTitle = "📍 By Area", ContentHtml = ReportHtmlHelper.Section("roots-area", "📍 By Area", insightRoots + ReportHtmlHelper.RenderTable(nrAreaCols, nrAreaRows), nrAreaRows.Count), RowCount = nrAreaRows.Count }); + rootsGroup.Sections.Add(new ReportSection { Anchor = "roots-top", SectionTitle = "🥇 Top 30 by Retained Size", ContentHtml = ReportHtmlHelper.Section("roots-top", "🥇 Top 30 by Retained Size", ReportHtmlHelper.RenderTable(nrTopCols, nrTopRows), nrTopRows.Count), RowCount = nrTopRows.Count }); + AddNav(model, rootsGroup); + model.Groups.Add(rootsGroup); + + // Memory Regions & Allocation Efficiency + var (regCols, regRows) = backend.ExecuteQuery(ReportSql.MemoryRegions); + var (aeCols, aeRows) = backend.ExecuteQuery(ReportSql.AllocationEfficiency); + var regionsGroup = new ReportGroup { GroupTitle = "🗂️ Memory & Allocations", GroupDesc = "Memory regions and allocation efficiency" }; + regionsGroup.Sections.Add(new ReportSection { Anchor = "regions", SectionTitle = "🗂️ Memory Regions", ContentHtml = ReportHtmlHelper.Section("regions", "🗂️ Memory Regions", ReportHtmlHelper.RenderTable(regCols, regRows), regRows.Count), RowCount = regRows.Count }); + regionsGroup.Sections.Add(new ReportSection { Anchor = "alloc-efficiency", SectionTitle = "⚡ Allocation Efficiency", ContentHtml = ReportHtmlHelper.Section("alloc-efficiency", "⚡ Allocation Efficiency", ReportHtmlHelper.RenderTable(aeCols, aeRows), aeRows.Count), RowCount = aeRows.Count }); + AddNav(model, regionsGroup); + model.Groups.Add(regionsGroup); + + // Connections + var (ctCols, ctRows) = backend.ExecuteQuery(ReportSql.ConnectionTypes); + var (mrCols, mrRows) = backend.ExecuteQuery(ReportSql.MostReferenced); + var (mrExCols, mrExRows) = backend.ExecuteQuery(ReportSql.MostReferencedExclMonoScript); + var (obCols, obRows) = backend.ExecuteQuery(ReportSql.MostOutbound); + var insightConn = ReportHtmlHelper.RenderInsight("Connection types and most-referenced / most-outbound native objects."); + var connGroup = new ReportGroup { GroupTitle = "🔗 Connections", GroupDesc = "Reference graph and connection types" }; + connGroup.Sections.Add(new ReportSection { Anchor = "connection-types", SectionTitle = "Connection Types", ContentHtml = ReportHtmlHelper.Section("connection-types", "Connection Types", insightConn + ReportHtmlHelper.RenderTable(ctCols, ctRows), ctRows.Count), RowCount = ctRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-referenced", SectionTitle = "Most Referenced (incl. MonoScript)", ContentHtml = ReportHtmlHelper.Section("most-referenced", "Most Referenced (incl. MonoScript)", ReportHtmlHelper.RenderTable(mrCols, mrRows, truncateCols: new HashSet { "name" }), mrRows.Count), RowCount = mrRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-referenced-excl", SectionTitle = "Most Referenced (excl. MonoScript)", ContentHtml = ReportHtmlHelper.Section("most-referenced-excl", "Most Referenced (excl. MonoScript)", ReportHtmlHelper.RenderTable(mrExCols, mrExRows, truncateCols: new HashSet { "name" }), mrExRows.Count), RowCount = mrExRows.Count }); + connGroup.Sections.Add(new ReportSection { Anchor = "most-outbound", SectionTitle = "Most Outbound", ContentHtml = ReportHtmlHelper.Section("most-outbound", "Most Outbound", ReportHtmlHelper.RenderTable(obCols, obRows, truncateCols: new HashSet { "name" }), obRows.Count), RowCount = obRows.Count }); + AddNav(model, connGroup); + model.Groups.Add(connGroup); + + return model; + } + + private static void AddNav(ReportModel model, ReportGroup group) + { + var navGroup = new NavGroup { GroupTitle = group.GroupTitle }; + foreach (var sec in group.Sections) + navGroup.Items.Add(new NavItem { Anchor = sec.Anchor, Title = sec.SectionTitle }); + model.NavGroups.Add(navGroup); + } + + private static double ToDouble(object? o) + { + if (o == null) return 0.0; + if (o is double d) return d; + if (o is float f) return f; + if (o is decimal m) return (double)m; + if (o is int i) return i; + if (o is long l) return l; + return double.TryParse(o.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture, out var parsed) ? parsed : 0.0; + } +} diff --git a/Core/Report/ReportHtmlHelper.cs b/Core/Report/ReportHtmlHelper.cs new file mode 100644 index 0000000..e333c6a --- /dev/null +++ b/Core/Report/ReportHtmlHelper.cs @@ -0,0 +1,199 @@ +using System.Globalization; +using System.Text; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Helpers for building report HTML: escaping, number/percent formatting, table and KV rendering, insight blocks, and section/group wrappers. +/// +internal static class ReportHtmlHelper +{ + private static readonly HashSet NumericCols = [ + "obj_count", "edge_count", "root_count", "num_allocations", "inbound_refs", "outbound_refs", + "duplicate_count", "duplicate_groups", "extra_instances", "total_objects", + "distinct_types", "objects_with_native_ref", "region_count", "row_count", "log4_bucket", + "total_orphaned", "total_destroyed", "destroyed_count", "leaked_count", "total_leaked_count" + ]; + + private static readonly HashSet PctCols = [ + "pct_of_total", "pct_of_native_total", "utilization_pct", "overall_utilization_pct" + ]; + + /// HTML-encodes a value for safe inclusion in the report; null is rendered as styled "null". + /// Value to escape (null allowed). + /// Encoded string or null placeholder. + public static string Escape(object? val) + { + if (val == null) return "null"; + return System.Net.WebUtility.HtmlEncode(val.ToString() ?? ""); + } + + /// Formats a value as a number (N0 for integers, N2 for decimals); NaN/infinity and null are escaped. + /// Value to format. + /// Formatted string or escaped placeholder. + public static string FmtNum(object? val) + { + if (val == null) return Escape(val); + if (val is int i) return i.ToString("N0", CultureInfo.InvariantCulture); + if (val is long l) return l.ToString("N0", CultureInfo.InvariantCulture); + if (val is double d) + { + if (double.IsNaN(d) || double.IsInfinity(d)) return Escape(val); + if (d == Math.Truncate(d) && Math.Abs(d) < 1e15) + return ((long)d).ToString("N0", CultureInfo.InvariantCulture); + return d.ToString("N2", CultureInfo.InvariantCulture); + } + if (val is float f) + { + if (float.IsNaN(f) || float.IsInfinity(f)) return Escape(val); + return f.ToString("N2", CultureInfo.InvariantCulture); + } + if (val is decimal m) return m.ToString("N2", CultureInfo.InvariantCulture); + return Escape(val); + } + + /// Returns true if the column name is treated as numeric (right-aligned, N0/N2 formatting). + /// Column name (case-insensitive). + /// True if numeric. + public static bool IsNumericCol(string col) + { + var lower = col.ToLowerInvariant(); + if (NumericCols.Contains(lower) || PctCols.Contains(lower)) return true; + return lower.EndsWith("_mb", StringComparison.Ordinal) || lower.EndsWith("_gb", StringComparison.Ordinal) || + lower.EndsWith("_kb", StringComparison.Ordinal) || lower.EndsWith("_count", StringComparison.Ordinal); + } + + /// Returns true if the column is displayed as a percentage (suffix %). + /// Column name (case-insensitive). + /// True if percentage column. + public static bool IsPctCol(string col) => + PctCols.Contains(col.ToLowerInvariant()) || col.ToLowerInvariant().EndsWith("_pct", StringComparison.Ordinal); + + /// Formats a cell value for the given column (percent, number, or escaped text). + /// Column name (determines format). + /// Cell value. + /// HTML-safe formatted string. + public static string FmtCell(string col, object? val) + { + if (val == null) return "null"; + if (IsPctCol(col) && TryDouble(val, out var pct)) return pct.ToString("N1", CultureInfo.InvariantCulture) + "%"; + if (IsNumericCol(col)) return FmtNum(val); + return Escape(val); + } + + private static bool TryDouble(object? o, out double d) + { + d = 0; + if (o == null) return false; + if (o is double x) { d = x; return true; } + if (o is float f) { d = f; return true; } + if (o is decimal m) { d = (double)m; return true; } + if (o is int i) { d = i; return true; } + if (o is long l) { d = l; return true; } + return double.TryParse(o.ToString(), NumberStyles.Any, CultureInfo.InvariantCulture, out d); + } + + /// Renders a sortable HTML table from column names and row arrays; optional warn column and truncation set. + /// Column headers. + /// Rows of cell values (length may vary per row). + /// If set, cells in this column with value > 0 get a warning style. + /// Column names to truncate with ellipsis and title=full value. + /// HTML fragment (table wrapped in div). + public static string RenderTable(string[] columns, List rows, string? warnCol = null, IReadOnlySet? truncateCols = null) + { + if (rows.Count == 0) + return "

No data available for this section.

"; + + var sb = new StringBuilder(); + sb.Append("
"); + foreach (var c in columns) + { + var numClass = IsNumericCol(c) ? " num" : ""; + sb.Append($""); + } + sb.Append(""); + + foreach (var row in rows) + { + sb.Append(""); + for (var i = 0; i < columns.Length; i++) + { + var col = columns[i]; + var val = i < row.Length ? row[i] : null; + var isNum = IsNumericCol(col); + var isTrunc = truncateCols != null && truncateCols.Contains(col); + var isWarn = warnCol == col && val != null && TryDouble(val, out var v) && v > 0; + var classes = new List(); + if (isNum) classes.Add("num"); + if (isTrunc) classes.Add("trunc"); + if (isWarn) classes.Add("warn"); + var cls = classes.Count > 0 ? " class=\"" + string.Join(" ", classes) + "\"" : ""; + var title = isTrunc && val != null ? " title=\"" + Escape(val) + "\"" : ""; + sb.Append($"{FmtCell(col, val)}"); + } + sb.Append(""); + } + sb.Append("
{Escape(c)}
"); + return sb.ToString(); + } + + /// Renders a key-value grid (e.g. snapshot path, version, generated date). + /// Label-to-value map. + /// HTML fragment (kv-grid div). + public static string RenderKv(IReadOnlyDictionary kv) + { + var sb = new StringBuilder(); + sb.Append("
"); + foreach (var (label, value) in kv) + { + var mono = label.Contains("path", StringComparison.OrdinalIgnoreCase) || label.Contains("version", StringComparison.OrdinalIgnoreCase) || label.Contains("date", StringComparison.OrdinalIgnoreCase); + var cls = mono ? "kv-value mono" : "kv-value"; + var display = value is int or long or double or float or decimal ? FmtNum(value) : Escape(value); + sb.Append($"
{Escape(label)}
{display}
"); + } + sb.Append("
"); + return sb.ToString(); + } + + /// Renders an insight block: paragraph plus optional stat pills (label, value, tone class e.g. "warn" or "good"). + /// Main text (may contain HTML). + /// Optional list of (label, value, tone) for pill display. + /// HTML fragment (insight div). + public static string RenderInsight(string text, List<(string Label, string Value, string Tone)>? pills = null) + { + var sb = new StringBuilder(); + sb.Append("

").Append(text).Append("

"); + if (pills != null && pills.Count > 0) + { + sb.Append("
"); + foreach (var (label, value, tone) in pills) + { + var toneClass = string.IsNullOrEmpty(tone) ? "" : " " + tone; + sb.Append($"
{Escape(label)}
{Escape(value)}
"); + } + sb.Append("
"); + } + sb.Append("
"); + return sb.ToString(); + } + + /// Wraps content in a section div with id, title, optional row-count badge. + /// Id for the section. + /// Section title. + /// Inner HTML. + /// If set, shows "N rows" badge. + /// HTML fragment. + public static string Section(string anchor, string title, string content, int? rowCount = null) + { + var badge = rowCount.HasValue ? $"{FmtNum(rowCount.Value)} rows" : ""; + return $"

{Escape(title)}

{badge}
{content}
"; + } + + /// Wraps inner HTML in a group div with title and description. + /// Group heading. + /// Optional description. + /// Inner HTML (sections). + /// HTML fragment. + public static string Group(string groupTitle, string groupDesc, string innerHtml) => + $"

{Escape(groupTitle)}

{Escape(groupDesc)}
{innerHtml}
"; +} diff --git a/Core/Report/ReportModel.cs b/Core/Report/ReportModel.cs new file mode 100644 index 0000000..a5ae390 --- /dev/null +++ b/Core/Report/ReportModel.cs @@ -0,0 +1,69 @@ +namespace MemorySnapshotDataTools.Report; + +/// Root model for the HTML report: title, db path, generated timestamp, and ordered groups with nav. +internal sealed class ReportModel +{ + /// Report title (e.g. "Memory Snapshot Report"). + public string Title { get; set; } = string.Empty; + + /// Database path shown in the subtitle. + public string DbPath { get; set; } = string.Empty; + + /// When the report was generated (UTC string). + public string GeneratedAtUtc { get; set; } = string.Empty; + + /// Content groups (Snapshot Info, Native Objects, Managed Heap, etc.). + public List Groups { get; } = []; + + /// Navigation groups for the sidebar (mirrors group/section structure). + public List NavGroups { get; } = []; +} + +/// Logical group of sections (e.g. "Native Objects") with a title and optional description. +internal sealed class ReportGroup +{ + /// Group heading (e.g. "Native Objects"). + public string GroupTitle { get; set; } = string.Empty; + + /// Optional short description. + public string GroupDesc { get; set; } = string.Empty; + + /// Sections within this group. + public List Sections { get; } = []; +} + +/// Single report section: anchor id, title, HTML content, and optional row count badge. +internal sealed class ReportSection +{ + /// Fragment id for nav links (e.g. "native-overview"). + public string Anchor { get; set; } = string.Empty; + + /// Section heading. + public string SectionTitle { get; set; } = string.Empty; + + /// Rendered HTML for the section body. + public string ContentHtml { get; set; } = string.Empty; + + /// Optional row count for badge display. + public int? RowCount { get; set; } +} + +/// Single navigation link (anchor + display title). +internal sealed class NavItem +{ + /// Fragment id matching a section anchor. + public string Anchor { get; set; } = string.Empty; + + /// Link text. + public string Title { get; set; } = string.Empty; +} + +/// Navigation group: label and list of links. +internal sealed class NavGroup +{ + /// Group label in the nav (e.g. "Native Objects"). + public string GroupTitle { get; set; } = string.Empty; + + /// Links in this nav group. + public List Items { get; } = []; +} diff --git a/Core/Report/ReportRenderer.cs b/Core/Report/ReportRenderer.cs new file mode 100644 index 0000000..072e11c --- /dev/null +++ b/Core/Report/ReportRenderer.cs @@ -0,0 +1,125 @@ +using System.Text; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Renders a to a single self-contained HTML string with embedded CSS and sortable-table script. +/// Produces a fixed nav, main content with groups and sections, and consistent styling. +/// +internal static class ReportRenderer +{ + private const string SortableScript = """ + document.querySelectorAll('table.sortable thead th').forEach(function(th) { + th.style.cursor = 'pointer'; + th.addEventListener('click', function() { + var table = th.closest('table'); + var tbody = table.querySelector('tbody'); + var rows = Array.from(tbody.querySelectorAll('tr')); + var headerCells = table.querySelectorAll('thead th'); + var col = Array.prototype.indexOf.call(headerCells, th); + var isNum = th.classList.contains('num'); + var dir = table.dataset.sortDir === 'asc' ? -1 : 1; + table.dataset.sortDir = table.dataset.sortDir === 'asc' ? 'desc' : 'asc'; + rows.sort(function(a, b) { + var ac = a.cells[col]; + var bc = b.cells[col]; + var av = ac ? ac.textContent.trim() : ''; + var bv = bc ? bc.textContent.trim() : ''; + if (isNum) { + var an = parseFloat(av.replace(/,/g, '')) || 0; + var bn = parseFloat(bv.replace(/,/g, '')) || 0; + return dir * (an - bn); + } + return dir * (av.localeCompare(bv)); + }); + rows.forEach(function(r) { tbody.appendChild(r); }); + }); + }); + """; + + private static readonly string Css = """ + *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } + body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; font-size: 13px; background: #f0f2f5; color: #1a1a2e; padding: 24px; line-height: 1.5; } + h1 { font-size: 22px; font-weight: 700; margin-bottom: 4px; color: #1a1a2e; } + .subtitle { font-size: 12px; color: #666; margin-bottom: 32px; font-family: "SF Mono", "Fira Code", Consolas, monospace; } + nav { position: fixed; top: 24px; right: 24px; width: 210px; background: #fff; border-radius: 8px; box-shadow: 0 1px 4px rgba(0,0,0,.1); padding: 12px 0; z-index: 100; max-height: calc(100vh - 48px); overflow-y: auto; } + nav > h3 { font-size: 10px; text-transform: uppercase; letter-spacing: .06em; color: #aaa; padding: 0 14px 8px; } + .nav-group-label { font-size: 10px; font-weight: 700; text-transform: uppercase; letter-spacing: .06em; color: #1a1a2e; padding: 8px 14px 4px; border-top: 1px solid #f0f2f5; margin-top: 4px; } + .nav-group:first-child .nav-group-label { border-top: none; margin-top: 0; } + nav a { display: block; font-size: 11px; color: #555; text-decoration: none; padding: 3px 14px 3px 20px; border-left: 2px solid transparent; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } + nav a:hover { background: #f0f2f5; border-left-color: #1a73e8; color: #1a73e8; } + main { max-width: 1100px; } + .group { margin-bottom: 8px; } + .group-header { display: flex; align-items: baseline; gap: 10px; padding: 20px 4px 10px; } + .group-header h2 { font-size: 17px; font-weight: 700; color: #1a1a2e; letter-spacing: -.01em; } + .group-header .group-desc { font-size: 12px; color: #888; font-style: italic; } + .section { background: #fff; border-radius: 8px; box-shadow: 0 1px 4px rgba(0,0,0,.08); margin-bottom: 16px; overflow: hidden; } + .section-header { display: flex; align-items: baseline; gap: 10px; padding: 14px 18px 10px; border-bottom: 1px solid #e8eaed; } + h3.section-title { font-size: 14px; font-weight: 600; color: #1a1a2e; } + .badge { font-size: 11px; font-weight: 500; background: #e8f0fe; color: #1a73e8; border-radius: 12px; padding: 2px 8px; } + .insight { padding: 10px 18px; background: #f8f9fb; border-bottom: 1px solid #e8eaed; font-size: 12px; color: #444; line-height: 1.6; } + .insight strong { color: #1a1a2e; } + .insight .stat-pills { display: flex; flex-wrap: wrap; gap: 8px; margin-top: 6px; } + .insight .pill { background: #fff; border: 1px solid #dde1e9; border-radius: 6px; padding: 4px 10px; font-size: 12px; line-height: 1.3; } + .insight .pill-label { color: #888; font-size: 10px; text-transform: uppercase; letter-spacing: .04em; } + .insight .pill-value { font-weight: 600; color: #1a1a2e; } + .insight .pill.warn .pill-value { color: #c0392b; } + .insight .pill.good .pill-value { color: #27ae60; } + .table-wrap { overflow-x: auto; } + table { width: 100%; border-collapse: collapse; } + thead th { background: #1a1a2e; color: #fff; font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: .04em; padding: 8px 12px; text-align: left; position: sticky; top: 0; white-space: nowrap; } + thead th.num { text-align: right; } + tbody tr:nth-child(even) { background: #f8f9fb; } + tbody tr:hover { background: #eef2ff; } + td { padding: 6px 12px; border-bottom: 1px solid #f0f2f5; white-space: nowrap; } + td.num { text-align: right; font-variant-numeric: tabular-nums; font-family: "SF Mono", "Fira Code", Consolas, monospace; font-size: 12px; color: #333; } + td.warn { color: #c0392b; font-weight: 600; } + td.trunc { max-width: 320px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; cursor: default; } + .empty { padding: 18px; color: #999; font-style: italic; } + .kv-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(220px, 1fr)); gap: 12px; padding: 16px 18px; border-bottom: 1px solid #f0f2f5; } + .kv-label { font-size: 10px; color: #888; text-transform: uppercase; letter-spacing: .05em; } + .kv-value { font-size: 15px; font-weight: 600; color: #1a1a2e; margin-top: 2px; } + .kv-value.mono { font-family: "SF Mono", "Fira Code", Consolas, monospace; font-size: 11px; font-weight: 400; color: #444; word-break: break-all; white-space: normal; } + """; + + /// Builds the full HTML document from the report model (nav, title, groups, sections). + /// Populated report model from . + /// Complete HTML string (UTF-8). + public static string Render(ReportModel model) + { + var titleEsc = System.Net.WebUtility.HtmlEncode(model.Title); + var dbPathEsc = System.Net.WebUtility.HtmlEncode(model.DbPath); + + var sb = new StringBuilder(); + sb.Append("\n\n\n\n\n"); + sb.Append(titleEsc); + sb.Append("\n\n\n\n\n
\n

").Append(titleEsc).Append("

\n

").Append(dbPathEsc).Append("

\n"); + + foreach (var group in model.Groups) + { + sb.Append("
\n

").Append(System.Net.WebUtility.HtmlEncode(group.GroupTitle)).Append("

").Append(System.Net.WebUtility.HtmlEncode(group.GroupDesc)).Append("
\n"); + foreach (var section in group.Sections) + { + sb.Append(section.ContentHtml); + } + sb.Append("
\n"); + } + + sb.Append("
\n\n\n"); + return sb.ToString(); + } +} diff --git a/Core/Report/ReportRunner.cs b/Core/Report/ReportRunner.cs new file mode 100644 index 0000000..a93f0fa --- /dev/null +++ b/Core/Report/ReportRunner.cs @@ -0,0 +1,82 @@ +using System.Diagnostics; +using MemorySnapshotDataTools.Report.Queries; + +namespace MemorySnapshotDataTools.Report; + +/// +/// Entry point for report generation: builds a from the exported database via , +/// renders HTML with , writes to file (or temp + browser), and optionally opens the report in the default browser. +/// +public static class ReportRunner +{ + /// + /// Generates the memory snapshot report: queries the database, builds the model, renders HTML, writes to (or a temp file), and optionally opens it in the browser. + /// + /// Database path, output path (null = temp + open browser), and report title. + /// Progress reporter for status messages. + /// Exit code 0 on success. + public static int Run(ReportRunOptions options, IProgressReporter progress) + { + var generatedAt = DateTime.UtcNow.ToString("yyyy-MM-dd HH:mm:ss", System.Globalization.CultureInfo.InvariantCulture) + " UTC"; + + progress.Report($"Report: {options.ReportDbPath} -> {options.ReportOutputPath ?? "(temp + browser)"}", force: true); + + using var backend = ReportQueryFactory.Create(options.ReportDbPath); + progress.Report($"Backend: {backend.Dialect}", force: true); + + var swTotal = Stopwatch.StartNew(); + ReportModel model; + + var swQuery = Stopwatch.StartNew(); + try + { + model = ReportBuilder.Build(backend, options.ReportTitle, options.ReportDbPath, generatedAt); + } + finally + { + swQuery.Stop(); + } + + var swRender = Stopwatch.StartNew(); + var html = ReportRenderer.Render(model); + swRender.Stop(); + + var outPath = options.ReportOutputPath; + var openBrowser = string.IsNullOrEmpty(outPath); + if (string.IsNullOrEmpty(outPath)) + { + outPath = Path.Combine(Path.GetTempPath(), "memsnapshot_report_" + Guid.NewGuid().ToString("N")[..8] + ".html"); + } + else + { + var dir = Path.GetDirectoryName(outPath); + if (!string.IsNullOrEmpty(dir)) + Directory.CreateDirectory(dir); + } + + var swWrite = Stopwatch.StartNew(); + File.WriteAllText(outPath, html, System.Text.Encoding.UTF8); + swWrite.Stop(); + swTotal.Stop(); + + progress.Report($"Report written → {outPath}", force: true); + progress.Report( + $"Timings: query_ms={swQuery.ElapsedMilliseconds}, render_ms={swRender.ElapsedMilliseconds}, write_ms={swWrite.ElapsedMilliseconds}, total_ms={swTotal.ElapsedMilliseconds}", force: true); + progress.Report($"Report completed in {swTotal.Elapsed.TotalSeconds:F1}s (query {swQuery.Elapsed.TotalSeconds:F1}s, render {swRender.Elapsed.TotalSeconds:F1}s, write {swWrite.Elapsed.TotalSeconds:F1}s)", force: true); + + if (openBrowser) + { + try + { + var uri = new Uri(outPath); + Process.Start(new ProcessStartInfo { FileName = uri.AbsoluteUri, UseShellExecute = true }); + } + catch + { + progress.Report($"Could not open browser. Open manually: {outPath}", force: true); + } + } + + return 0; + } +} diff --git a/MemorySnapshotDataTools.sln b/MemorySnapshotDataTools.sln new file mode 100644 index 0000000..72008c3 --- /dev/null +++ b/MemorySnapshotDataTools.sln @@ -0,0 +1,34 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Core", "Core\MemorySnapshotDataTools.Core.csproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Cli", "Cli\MemorySnapshotDataTools.Cli.csproj", "{C3D4E5F6-A7B8-9012-CDEF-123456789012}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MemorySnapshotDataTools.Tests", "Tests\MemorySnapshotDataTools.Tests.csproj", "{B2C3D4E5-F6A7-8901-BCDE-F12345678901}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|Any CPU.Build.0 = Release|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C3D4E5F6-A7B8-9012-CDEF-123456789012}.Release|Any CPU.Build.0 = Release|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2C3D4E5-F6A7-8901-BCDE-F12345678901}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/README.md b/README.md index ee75514..ae0a7f8 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,104 @@ -# cse-memory-snapshot-data-tool -[View this project in Unity Internal Developer Portal](https://developer.portal.internal.unity.com/catalog/default/component/cse-memory-snapshot-data-tool)
-# Converting to public repository -Any and all Unity software of any description (including components) (1) whose source is to be made available other than under a Unity source code license or (2) in respect of which a public announcement is to be made concerning its inner workings, may be licensed and released only upon the prior approval of Legal. +# Memory Snapshot Data Tools + +Single CLI to **export** Unity memory snapshots (`.snap`) to DuckDB or SQLite and **generate** HTML reports from those databases. + +## What it does + +- **Export:** Reads a `.snap` file, parses and extracts snapshot data, and writes it to a DuckDB (default) or SQLite file. +- **Report:** Connects to an exported database (DuckDB or SQLite), runs report queries, and produces a self-contained HTML report with sortable tables. + +## Prerequisites + +- [.NET 10 SDK](https://dotnet.microsoft.com/download/dotnet/10.0) + +## How it works + +- **Cli** (exe): entry point and options; **Core** (library): Parser (extraction), Export + ExportDestination (write DBs), Report (query + render). Shared data lives in Core (Models). +- **Export:** reads `.snap` via Parser, extracts rows (SnapshotBridge), writes to DuckDB or SQLite via a producer/consumer pipeline. +- **Report:** opens the DB with Report/Queries backend, runs SQL, builds ReportModel, renders HTML (ReportRenderer + ReportHtmlHelper). + +## How to use + +Use the **MemorySnapshotDataTools** directory as the project root. Run the CLI with the Cli project: + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- [args...] +``` + +Or from the `Cli` directory: `dotnet run -- [args...]`. + +### Export a snapshot to a database + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export [options] +``` + +- Use a `.duckdb` extension for DuckDB (default) or `.db` for SQLite. +- **Options:** `--destination duckdb|sqlite`, `--validate none|minimal|full`, `--verbose` (progress and timings). + +**Example (DuckDB):** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export ./memory.snap ./out.duckdb --validate minimal --verbose +``` + +**Example (SQLite):** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- export ./memory.snap ./out.db --destination sqlite --validate minimal --verbose +``` + +### Generate a report from a database + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report [--out report.html] [options] +``` + +- **`--out`** path: where to write the HTML file. If omitted, writes to a temp file and opens it in the browser. +- **`--title "Title"`:** report title (default: "Memory Snapshot Report"). +- **`--verbose`:** print timings (query, render, write). + +**Example:** + +```bash +dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj -- report ./out.duckdb --out report.html --verbose +``` + +## Output + +- **Export:** Creates a `.duckdb` or `.db` file with tables: `snapshot_info`, `native_objects`, `managed_objects`, `connections`, `native_roots`, `memory_regions`, `native_allocations`. +- **Report:** Produces one HTML file with navigation, sections (Snapshot Info, Native Objects, Managed Heap, Roots, Regions, Connections), and sortable tables. +- **Timings:** With `--verbose`, export prints parse+extract vs. write; report prints query vs. render vs. write and a one-line summary (e.g. `Report completed in 2.3s (query 1.1s, render 0.5s, write 0.1s)`). + +## Schema (for ad-hoc queries) + +| Table | Description | +|---------------------|--------------------------------------------------| +| `snapshot_info` | Snapshot path, export timestamp, Unity version | +| `native_objects` | Native Unity objects (size, type, name) | +| `managed_objects` | Managed heap objects (address, type, size) | +| `connections` | Edges: from_kind/from_index → to_kind/to_index | +| `native_roots` | Root references and accumulated size | +| `memory_regions` | Native memory regions (address, size, hierarchy) | +| `native_allocations`| Allocations within regions | + +Use any DuckDB or SQLite client to query these tables. + +## Build and test + +From the project root: + +```bash +dotnet build +dotnet test +``` + +To run the CLI: `dotnet run --project Cli/MemorySnapshotDataTools.Cli.csproj --` or publish the Cli project (see below). + +## Publish (versioned artifacts) + +From the project root, run `./publish.sh` (macOS/Linux) or `./publish.ps1` (Windows). These publish the **Cli** project and produce `artifacts/MemorySnapshotDataTools--.zip` for each runtime (win-x64, linux-x64, osx-x64, osx-arm64). + +## AI IDE integration + +A project skill for Cursor (and similar AI IDEs) is in `.cursor/skills/memory-snapshot-report/`. It describes the export and report workflow and when to use it. diff --git a/README_UNITY.md b/README_UNITY.md new file mode 100644 index 0000000..ee75514 --- /dev/null +++ b/README_UNITY.md @@ -0,0 +1,4 @@ +# cse-memory-snapshot-data-tool +[View this project in Unity Internal Developer Portal](https://developer.portal.internal.unity.com/catalog/default/component/cse-memory-snapshot-data-tool)
+# Converting to public repository +Any and all Unity software of any description (including components) (1) whose source is to be made available other than under a Unity source code license or (2) in respect of which a public announcement is to be made concerning its inner workings, may be licensed and released only upon the prior approval of Legal. diff --git a/Tests/MemorySnapshotDataTools.Tests.csproj b/Tests/MemorySnapshotDataTools.Tests.csproj new file mode 100644 index 0000000..a43344d --- /dev/null +++ b/Tests/MemorySnapshotDataTools.Tests.csproj @@ -0,0 +1,17 @@ + + + net10.0 + enable + enable + latest + false + + + + + + + + + + diff --git a/Tests/SnapshotBridgeTests.cs b/Tests/SnapshotBridgeTests.cs new file mode 100644 index 0000000..e251d1a --- /dev/null +++ b/Tests/SnapshotBridgeTests.cs @@ -0,0 +1,101 @@ +using MemorySnapshotDataTools; +using MemorySnapshotDataTools.Parser; +using Xunit; + +namespace MemorySnapshotDataTools.Tests; + +public sealed class SnapshotBridgeTests +{ + /// + /// Builds a minimal DecodedSnapshot that passes ExtractFromDecoded validation: + /// no managed objects, no connections, no memory regions/allocations. + /// + private static DecodedSnapshot CreateMinimalDecoded() + { + return new DecodedSnapshot + { + FormatVersion = 1, + NativeTypeNames = [], + NativeObjectTypeIndices = [], + NativeObjectInstanceIds = [], + NativeObjectNames = [], + NativeObjectSizes = [], + NativeObjectFlags = [], + NativeObjectGcHandleIndices = [], + GcHandleTargets = [], + ConnectionsFrom = [], + ConnectionsTo = [], + NativeRootIds = [], + NativeRootAreaNames = [], + NativeRootObjectNames = [], + NativeRootAccumulatedSizes = [], + NativeMemoryRegionNames = [], + NativeMemoryRegionParentIndices = [], + NativeMemoryRegionAddressBases = [], + NativeMemoryRegionAddressSizes = [], + NativeMemoryRegionFirstAllocationIndices = [], + NativeMemoryRegionNumAllocations = [], + NativeAllocationAddresses = [], + NativeAllocationSizes = [], + NativeAllocationOverheadSizes = [], + NativeAllocationPaddingSizes = [], + NativeAllocationMemoryRegionIndices = [], + VirtualMachineInformation = new DecodedVirtualMachineInfo { PointerSize = 8 }, + ManagedHeapSectionStartAddresses = [], + ManagedHeapSectionBytes = [], + ManagedTypeFlags = [], + ManagedTypeNames = [], + ManagedTypeAssemblies = [], + ManagedTypeBaseOrElementTypeIndices = [], + ManagedTypeSizes = [], + ManagedTypeInfoAddresses = [], + ManagedTypeFieldIndices = [], + FieldOffsets = [], + FieldTypeIndices = [], + FieldNames = [], + FieldIsStatic = [], + }; + } + + [Fact] + public void ExtractFromDecoded_MinimalNativeRoots_ProducesMatchingRows() + { + var decoded = CreateMinimalDecoded(); + decoded.NativeRootIds = [123L]; + decoded.NativeRootAreaNames = ["Scene"]; + decoded.NativeRootObjectNames = ["Root"]; + decoded.NativeRootAccumulatedSizes = [1000UL]; + + var data = SnapshotBridge.ExtractFromDecoded(decoded, "/path/to/snap.snap"); + + var row = Assert.Single(data.NativeRoots); + Assert.Equal(0, row.RootIndex); + Assert.Equal(123L, row.RootId); + Assert.Equal("Scene", row.AreaName); + Assert.Equal("Root", row.ObjectName); + Assert.Equal(1000UL, row.AccumulatedSizeBytes); + } + + [Fact] + public void ExtractFromDecoded_MinimalNativeObjects_ProducesMatchingRows() + { + var decoded = CreateMinimalDecoded(); + decoded.NativeTypeNames = ["GameObject"]; + decoded.NativeObjectTypeIndices = [0]; + decoded.NativeObjectInstanceIds = [42UL]; + decoded.NativeObjectNames = ["MyGo"]; + decoded.NativeObjectSizes = [64UL]; + decoded.NativeObjectFlags = [0]; + + var data = SnapshotBridge.ExtractFromDecoded(decoded, "/path/to/snap.snap"); + + var row = Assert.Single(data.NativeObjects); + Assert.Equal(0, row.NativeObjectIndex); + Assert.Equal("42", row.InstanceId); + Assert.Equal("MyGo", row.Name); + Assert.Equal(64UL, row.SizeBytes); + Assert.Equal(0, row.TypeIndex); + Assert.Equal("GameObject", row.NativeTypeName); + Assert.False(row.IsDestroyed); + } +} diff --git a/publish.ps1 b/publish.ps1 new file mode 100644 index 0000000..05c332d --- /dev/null +++ b/publish.ps1 @@ -0,0 +1,37 @@ +# Build and zip MemorySnapshotDataTools for each RID. Run from MemorySnapshotDataTools (project root). +# Produces: artifacts/MemorySnapshotDataTools--.zip + +$ErrorActionPreference = "Stop" +$Root = Split-Path -Parent $MyInvocation.MyCommand.Path +$Project = Join-Path $Root "Cli\MemorySnapshotDataTools.Cli.csproj" +$PublishDir = Join-Path $Root "publish" +$ArtifactsDir = Join-Path $Root "artifacts" +$Rids = @("win-x64", "linux-x64", "osx-x64", "osx-arm64") + +# Read version from csproj +$versionNode = Select-String -Path $Project -Pattern '([^<]+)' -AllMatches +if (-not $versionNode) { throw "Could not read Version from $Project" } +$Version = $versionNode.Matches.Groups[1].Value + +New-Item -ItemType Directory -Force -Path $PublishDir, $ArtifactsDir | Out-Null +Push-Location $Root + +try { + foreach ($rid in $Rids) { + Write-Host "Publishing $rid..." + $outDir = Join-Path $PublishDir $rid + dotnet publish $Project -c Release -r $rid --self-contained true -p:PublishSingleFile=true -o $outDir + $zipName = "MemorySnapshotDataTools-$Version-$rid.zip" + $zipPath = Join-Path $ArtifactsDir $zipName + Write-Host "Zipping $zipName" + Compress-Archive -Path (Join-Path $outDir "*") -DestinationPath $zipPath -Force + Remove-Item -Recurse -Force $outDir -ErrorAction SilentlyContinue + } + if ((Get-ChildItem $PublishDir -ErrorAction SilentlyContinue).Count -eq 0) { + Remove-Item -Force $PublishDir -ErrorAction SilentlyContinue + } + Write-Host "Done. Artifacts in $ArtifactsDir:" + Get-ChildItem (Join-Path $ArtifactsDir "*.zip") | Format-Table Name, Length -AutoSize +} finally { + Pop-Location +} diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..063d2a7 --- /dev/null +++ b/publish.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Build and zip MemorySnapshotDataTools for each RID. Run from MemorySnapshotDataTools (project root). +# Produces: artifacts/MemorySnapshotDataTools--.zip + +set -euo pipefail +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT="$ROOT/Cli/MemorySnapshotDataTools.Cli.csproj" +PUBLISH_DIR="$ROOT/publish" +ARTIFACTS_DIR="$ROOT/artifacts" +RIDS=(win-x64 linux-x64 osx-x64 osx-arm64) + +# Read version from csproj (e.g. 0.1.0) +VERSION=$(grep -oE '[^<]+' "$PROJECT" | sed 's/<[^>]*>//g') +if [[ -z "$VERSION" ]]; then + echo "Could not read Version from $PROJECT" + exit 1 +fi + +cd "$ROOT" +mkdir -p "$PUBLISH_DIR" "$ARTIFACTS_DIR" + +for RID in "${RIDS[@]}"; do + echo "Publishing $RID..." + dotnet publish "$PROJECT" -c Release -r "$RID" --self-contained true -p:PublishSingleFile=true -o "$PUBLISH_DIR/$RID" + echo "Zipping MemorySnapshotDataTools-$VERSION-$RID.zip" + (cd "$PUBLISH_DIR/$RID" && zip -rq "$ARTIFACTS_DIR/MemorySnapshotDataTools-$VERSION-$RID.zip" .) + rm -rf "$PUBLISH_DIR/$RID" +done + +rmdir "$PUBLISH_DIR" 2>/dev/null || true +echo "Done. Artifacts in $ARTIFACTS_DIR:" +ls -la "$ARTIFACTS_DIR"/*.zip