From 7c0a83df326831ac14ca1308a552f4bc71661fc2 Mon Sep 17 00:00:00 2001 From: "Victor M. Varela" Date: Thu, 7 May 2026 14:59:17 +0200 Subject: [PATCH 1/2] feat: add XML input/output format support (#99) - New src/xml.zig: row-based XML parser and writer - writeXmlHeader/writeXmlRow/writeXmlFooter for output - XmlParser struct for input (line/col error tracking, entity decoding) - loadXmlInput, getXmlColumnNames, summarizeXml for all three modes - main.zig: xml added to InputFormat and OutputFormat enums - --xml-root and --xml-row flags to customise element names (defaults: results, row) - XML dispatch in run(), runColumns(), runValidate(), runSample() (fatal) - build.zig: tests 57/58 updated to use parquet as unknown format; 6 new XML integration tests (99-104) - docs and README: updated format lists, new --xml-root/--xml-row flag docs, XML usage example --- README.md | 23 +- build.zig | 69 +++- docs/sql-pipe.1.scd | 40 ++- src/main.zig | 117 ++++++- src/xml.zig | 816 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 1044 insertions(+), 21 deletions(-) create mode 100644 src/xml.zig diff --git a/README.md b/README.md index b87f5bb..97e18c3 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,19 @@ $ printf 'name,age\nAlice,30\nBob,25' | sql-pipe --json 'SELECT * FROM t' `--json` is mutually exclusive with `-H`/`--header`. It can be combined with `-d`/`--delimiter` and `--tsv` to read non-comma-separated input. +For XML input and output, use `-I xml` / `-O xml`. By default the root element is `` and each row is ``. Override with `--xml-root` and `--xml-row`: + +```sh +$ printf 'name,age\nAlice,30\nBob,25' | sql-pipe -O xml 'SELECT * FROM t' + + +Alice30 +Bob25 + + +$ cat data.xml | sql-pipe -I xml 'SELECT name FROM t WHERE age > 25' +``` + Chain queries by piping back in — useful for two-pass aggregations. Pass `-H` to the first call so the second one sees column names: ```sh @@ -208,15 +221,17 @@ $ cat events.csv \ |------|-------------| | `-d`, `--delimiter ` | Input field delimiter (single character, default `,`) | | `--tsv` | Alias for `--delimiter '\t'` | -| `-I`, `--input-format ` | Input format: `csv` (default), `tsv`, `json`, `ndjson` | -| `-O`, `--output-format ` | Output format: `csv` (default), `tsv`, `json`, `ndjson` | +| `-I`, `--input-format ` | Input format: `csv` (default), `tsv`, `json`, `ndjson`, `xml` | +| `-O`, `--output-format ` | Output format: `csv` (default), `tsv`, `json`, `ndjson`, `xml` | | `--no-type-inference` | Treat all columns as TEXT (skip auto-detection) | | `-H`, `--header` | Print column names as the first output row | | `--json` | Alias for `--output-format json` (mutually exclusive with `-H`) | | `--max-rows ` | Stop if more than `n` data rows are read (exit 1) | -| `--validate` | Parse the entire input and print a summary (`OK: rows, columns (col TYPE, ...)`) to stdout. Exit 0 on success, exit 2 on parse error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`, `-I`/`--input-format` (csv, tsv, json, ndjson). JSON/NDJSON columns are reported as TEXT. | -| `--columns` | Read the CSV header row, print each column name on its own line, and exit 0. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. | +| `--validate` | Parse the entire input and print a summary (`OK: rows, columns (col TYPE, ...)`) to stdout. Exit 0 on success, exit 2 on parse error. No query required. Compatible with `--delimiter`, `--tsv`, `--no-type-inference`, `-I`/`--input-format` (csv, tsv, json, ndjson, xml). JSON/NDJSON/XML columns are reported as TEXT. | +| `--columns` | Read the input header, print each column name on its own line, and exit 0. Supports CSV, TSV, JSON, NDJSON, and XML input. With `-v`/`--verbose`, also shows the inferred type per column (`name INTEGER`). Respects `--delimiter` and `--tsv`. Mutually exclusive with a query argument. | | `--sample []` | Print a schema comment block to stderr and the first `` data rows to stdout as CSV (default: `n=10`). The schema block lists each column name and its inferred type, prefixed with `#`. Implies `--header`. Compatible with `--delimiter` and `--tsv`. Mutually exclusive with `--json` and a query argument. No query required. | +| `--xml-root ` | Root element name for XML I/O (default: `results`) | +| `--xml-row ` | Row element name for XML I/O (default: `row`) | | `--output ` | Write results to the given file instead of stdout. Creates or overwrites the file. Exits 1 if the file cannot be created. | | `-v`, `--verbose` | Print `Loaded rows in s` to stderr after loading (always on TTY; forced with flag) | | `-s`, `--silent` | Suppress `Loaded rows in s` and the progress counter from stderr unconditionally. Cannot be combined with `-v`/`--verbose` | diff --git a/build.zig b/build.zig index 1e497f6..d7d621a 100644 --- a/build.zig +++ b/build.zig @@ -593,7 +593,7 @@ pub fn build(b: *std.Build) void { // Integration test 57: unknown input format → error exit 1 const test_bad_input_format = b.addSystemCommand(&.{ "bash", "-c", - \\msg=$(printf '' | ./zig-out/bin/sql-pipe --input-format xml 'SELECT 1' 2>&1 >/dev/null; echo "EXIT:$?") + \\msg=$(printf '' | ./zig-out/bin/sql-pipe --input-format parquet 'SELECT 1' 2>&1 >/dev/null; echo "EXIT:$?") \\echo "$msg" | grep -q 'unknown input format' && echo "$msg" | grep -q 'EXIT:1' }); test_bad_input_format.step.dependOn(b.getInstallStep()); @@ -602,7 +602,7 @@ pub fn build(b: *std.Build) void { // Integration test 58: unknown output format → error exit 1 const test_bad_output_format = b.addSystemCommand(&.{ "bash", "-c", - \\msg=$(printf 'a\n1\n' | ./zig-out/bin/sql-pipe --output-format xml 'SELECT * FROM t' 2>&1 >/dev/null; echo "EXIT:$?") + \\msg=$(printf 'a\n1\n' | ./zig-out/bin/sql-pipe --output-format parquet 'SELECT * FROM t' 2>&1 >/dev/null; echo "EXIT:$?") \\echo "$msg" | grep -q 'unknown output format' && echo "$msg" | grep -q 'EXIT:1' }); test_bad_output_format.step.dependOn(b.getInstallStep()); @@ -1011,6 +1011,71 @@ pub fn build(b: *std.Build) void { test_delimiter_too_long_error.step.dependOn(b.getInstallStep()); test_step.dependOn(&test_delimiter_too_long_error.step); + // ─── XML input/output integration tests ───────────────────────────────── + + // Integration test 99: XML output format emits correct structure + const test_xml_output = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\nBob,25\n' \ + \\ | ./zig-out/bin/sql-pipe --output-format xml 'SELECT * FROM t ORDER BY name') + \\expected=$(printf '\n\nAlice30\nBob25\n') + \\[ "$result" = "$expected" ] + }); + test_xml_output.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_output.step); + + // Integration test 100: XML input can be queried + const test_xml_input = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '\n\nAlice30\nBob25\n\n' \ + \\ | ./zig-out/bin/sql-pipe --input-format xml 'SELECT name FROM t ORDER BY name') + \\expected=$(printf 'Alice\nBob') + \\[ "$result" = "$expected" ] + }); + test_xml_input.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_input.step); + + // Integration test 101: XML roundtrip (xml in → xml out) + const test_xml_roundtrip = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '\n\nAlice30\n\n' \ + \\ | ./zig-out/bin/sql-pipe -I xml -O xml 'SELECT * FROM t') + \\echo "$result" | grep -q 'Alice' && echo "$result" | grep -q '30' + }); + test_xml_roundtrip.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_roundtrip.step); + + // Integration test 102: --columns with XML input lists column names + const test_xml_columns = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '\n\nAlice30\n\n' \ + \\ | ./zig-out/bin/sql-pipe -I xml --columns) + \\expected=$(printf 'name\nage') + \\[ "$result" = "$expected" ] + }); + test_xml_columns.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_columns.step); + + // Integration test 103: --validate with XML input prints summary + const test_xml_validate = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf '\n\nAlice30\nBob25\n\n' \ + \\ | ./zig-out/bin/sql-pipe -I xml --validate) + \\echo "$result" | grep -q 'OK: 2 rows' + }); + test_xml_validate.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_validate.step); + + // Integration test 104: --xml-root and --xml-row customize element names + const test_xml_custom_elements = b.addSystemCommand(&.{ + "bash", "-c", + \\result=$(printf 'name,age\nAlice,30\n' \ + \\ | ./zig-out/bin/sql-pipe -O xml --xml-root data --xml-row record 'SELECT * FROM t') + \\echo "$result" | grep -q '' && echo "$result" | grep -q '' && echo "$result" | grep -q '' + }); + test_xml_custom_elements.step.dependOn(b.getInstallStep()); + test_step.dependOn(&test_xml_custom_elements.step); + // Unit tests for the RFC 4180 CSV parser (src/csv.zig) const unit_tests = b.addTest(.{ .root_module = b.createModule(.{ diff --git a/docs/sql-pipe.1.scd b/docs/sql-pipe.1.scd index ecb88bc..07197cb 100644 --- a/docs/sql-pipe.1.scd +++ b/docs/sql-pipe.1.scd @@ -72,22 +72,33 @@ OPTIONS stderr is a TTY. Useful for producing clean stderr in interactive terminals. Cannot be combined with *-v* / *--verbose*. + *--xml-root* + Root element name used when reading or writing XML (default: *results*). + The output document is wrapped in *...*. Also used as the + expected root tag when parsing XML input. + + *--xml-row* + Row element name used when reading or writing XML (default: *row*). + Each result row is emitted as *value...*. + *--validate* Parse the entire input without executing a SQL query. On success, prints a one-line summary to standard output: *OK: rows, columns ( , ...)* and exits 0. On parse error, prints the error message and exits 2. Compatible with *--delimiter*, *--tsv*, *--no-type-inference*, and - *-I* / *--input-format* (csv, tsv, json, ndjson). JSON and NDJSON - columns are reported as TEXT. Mutually exclusive with a query + *-I* / *--input-format* (csv, tsv, json, ndjson, xml). JSON, NDJSON, + and XML columns are reported as TEXT. Mutually exclusive with a query argument. *--columns* - Read the CSV header row, print each column name on its own line to - standard output, and exit with code 0. When combined with *-v* / - *--verbose*, also shows the inferred type (INTEGER, REAL, or TEXT) - for each column, using the first 100 data rows for inference. Respects - *--delimiter* and *--tsv*. Mutually exclusive with a query argument. + Read the input header, print each column name on its own line to + standard output, and exit with code 0. Supported for CSV, TSV, + JSON, NDJSON, and XML input. When combined with *-v* / *--verbose*, + also shows the inferred type (INTEGER, REAL, or TEXT) for each column + (CSV/TSV only; other formats always show TEXT), using the first 100 + data rows for inference. Respects *--delimiter* and *--tsv*. + Mutually exclusive with a query argument. *--sample* [] Print a schema comment block to standard error and the first data @@ -157,6 +168,21 @@ EXAMPLES Output:++ [{"name":"Alice","age":30},{"name":"Bob","age":25}] + Convert CSV to XML: + + $ printf 'name,age\nAlice,30\nBob,25' | sql-pipe -O xml 'SELECT \* FROM t' + + Output:++ + ++ + ++ + Alice30++ + Bob25++ + + + Query XML input: + + $ cat data.xml | sql-pipe -I xml 'SELECT name FROM t WHERE age > 25' + Preview schema and first 3 rows of a CSV file: $ cat sales.csv | sql-pipe --sample 3 diff --git a/src/main.zig b/src/main.zig index 8996bd2..8340cdb 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2,6 +2,7 @@ const std = @import("std"); const c = @import("c"); const csv = @import("csv.zig"); const json = @import("json.zig"); +const xml = @import("xml.zig"); const build_options = @import("build_options"); const VERSION: []const u8 = build_options.version; @@ -70,10 +71,10 @@ const ExitCode = enum(u8) { }; /// Supported input formats. -const InputFormat = enum { csv, tsv, json, ndjson }; +const InputFormat = enum { csv, tsv, json, ndjson, xml }; /// Supported output formats. -const OutputFormat = enum { csv, tsv, json, ndjson }; +const OutputFormat = enum { csv, tsv, json, ndjson, xml }; /// Parsed command-line arguments. const ParsedArgs = struct { @@ -98,6 +99,10 @@ const ParsedArgs = struct { silent: bool, /// Write results to this file path instead of stdout; null = write to stdout. output: ?[]const u8, + /// Root element name for XML output (default: "results"). + xml_root: []const u8, + /// Row element name for XML output (default: "row"). + xml_row: []const u8, }; /// Arguments for `--columns` mode. @@ -163,8 +168,8 @@ fn printUsage(writer: *std.Io.Writer) !void { \\Options: \\ -d, --delimiter Input field delimiter for CSV: 1–8 chars (default: ,) \\ --tsv Alias for --delimiter '\t' - \\ -I, --input-format Input format: csv (default), tsv, json, ndjson - \\ -O, --output-format Output format: csv (default), tsv, json, ndjson + \\ -I, --input-format Input format: csv (default), tsv, json, ndjson, xml + \\ -O, --output-format Output format: csv (default), tsv, json, ndjson, xml \\ --json Alias for --output-format json \\ --no-type-inference Treat all columns as TEXT (CSV input only) \\ -H, --header Print column names as the first output row (CSV/TSV output only) @@ -185,6 +190,8 @@ fn printUsage(writer: *std.Io.Writer) !void { \\ Implies --header. Compatible with --delimiter and --tsv. \\ Incompatible with --json and with a query argument. \\ --output Write results to file instead of stdout + \\ --xml-root Root element name for XML I/O (default: results) + \\ --xml-row Row element name for XML I/O (default: row) \\ -h, --help Show this help message and exit \\ -V, --version Show version and exit \\ @@ -227,6 +234,7 @@ fn parseInputFormat(s: []const u8) SqlPipeError!InputFormat { if (std.mem.eql(u8, s, "tsv")) return .tsv; if (std.mem.eql(u8, s, "json")) return .json; if (std.mem.eql(u8, s, "ndjson")) return .ndjson; + if (std.mem.eql(u8, s, "xml")) return .xml; return error.InvalidInputFormat; } @@ -239,6 +247,7 @@ fn parseOutputFormat(s: []const u8) SqlPipeError!OutputFormat { if (std.mem.eql(u8, s, "tsv")) return .tsv; if (std.mem.eql(u8, s, "json")) return .json; if (std.mem.eql(u8, s, "ndjson")) return .ndjson; + if (std.mem.eql(u8, s, "xml")) return .xml; return error.InvalidOutputFormat; } @@ -265,6 +274,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { var list_columns = false; var validate = false; var output: ?[]const u8 = null; + var xml_root: []const u8 = "results"; + var xml_row: []const u8 = "row"; var sample_mode = false; var sample_n: usize = 10; @@ -363,6 +374,18 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { const trimmed = std.mem.trim(u8, arg["--output=".len..], " \t"); if (trimmed.len == 0) return error.InvalidOutputPath; output = trimmed; + } else if (std.mem.eql(u8, arg, "--xml-root")) { + i += 1; + if (i >= args.len) return error.MissingQuery; + xml_root = args[i]; + } else if (std.mem.startsWith(u8, arg, "--xml-root=")) { + xml_root = arg["--xml-root=".len..]; + } else if (std.mem.eql(u8, arg, "--xml-row")) { + i += 1; + if (i >= args.len) return error.MissingQuery; + xml_row = args[i]; + } else if (std.mem.startsWith(u8, arg, "--xml-row=")) { + xml_row = arg["--xml-row=".len..]; } else { if (query == null) query = arg; } @@ -452,6 +475,8 @@ fn parseArgs(args: []const [:0]const u8) SqlPipeError!ArgsResult { .verbose = verbose, .silent = silent, .output = output, + .xml_root = xml_root, + .xml_row = xml_row, } }; } @@ -884,6 +909,8 @@ fn execQuery( writer: *std.Io.Writer, header: bool, output_format: OutputFormat, + xml_root: []const u8, + xml_row: []const u8, ) (SqlPipeError || std.mem.Allocator.Error || std.Io.Writer.Error)!void { const query_z = try allocator.dupeZ(u8, query); defer allocator.free(query_z); @@ -943,6 +970,23 @@ fn execQuery( try printRow(stmt.?, col_count, writer, out_delim); } }, + .xml => { + // Collect column names before stepping + var col_names = try allocator.alloc([*:0]const u8, @intCast(col_count)); + defer allocator.free(col_names); + var ci: c_int = 0; + while (ci < col_count) : (ci += 1) { + col_names[@intCast(ci)] = c.sqlite3_column_name(stmt, ci); + } + + try xml.writeXmlHeader(writer, xml_root); + // Loop invariant I: all SQLITE_ROW results returned so far have been written as XML rows + // Bounding function: number of remaining rows in the result set (finite) + while (c.sqlite3_step(stmt) == c.SQLITE_ROW) { + try xml.writeXmlRow(stmt.?, col_count, col_names, writer, xml_row); + } + try xml.writeXmlFooter(writer, xml_root); + }, } } @@ -1435,6 +1479,27 @@ fn runColumns( break; } }, + .xml => { + var stdin_buf: [4096]u8 = undefined; + var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + + const names = xml.getXmlColumnNames(allocator, &stdin_file_reader.interface, stderr_writer); + defer { + for (names) |name| allocator.free(name); + allocator.free(names); + } + for (names) |name| { + if (args.verbose) { + stdout_writer.print("{s} TEXT\n", .{name}) catch |err| { + std.log.err("failed to write output: {}", .{err}); + }; + } else { + stdout_writer.print("{s}\n", .{name}) catch |err| { + std.log.err("failed to write output: {}", .{err}); + }; + } + } + }, } } @@ -1704,6 +1769,37 @@ fn runValidate( std.process.exit(@intFromEnum(ExitCode.usage)); }; }, + .xml => { + var stdin_buf: [4096]u8 = undefined; + var stdin_file_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + + const summary = xml.summarizeXml(allocator, &stdin_file_reader.interface, stderr_writer); + defer { + for (summary.col_names) |name| allocator.free(name); + allocator.free(summary.col_names); + } + + var count_buf: [32]u8 = undefined; + const count_str = fmtThousands(&count_buf, summary.row_count); + stdout_writer.print("OK: {s} rows, {d} columns (", .{ count_str, summary.col_names.len }) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + for (summary.col_names, 0..) |name, i| { + if (i > 0) stdout_writer.writeAll(", ") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + stdout_writer.print("{s} TEXT", .{name}) catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + } + stdout_writer.writeAll(")\n") catch |err| { + std.log.err("failed to write output: {}", .{err}); + std.process.exit(@intFromEnum(ExitCode.usage)); + }; + }, } } @@ -1721,7 +1817,7 @@ fn runSample( stdout_writer: *std.Io.Writer, ) void { switch (args.input_format) { - .json, .ndjson => fatal( + .json, .ndjson, .xml => fatal( "--sample only supports CSV and TSV input; use -I csv (default) or --tsv", stderr_writer, .usage, @@ -1894,6 +1990,11 @@ fn run( var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); break :blk json.loadNdjsonInput(allocator, &stdin_reader.interface, db, parsed.max_rows, stderr_writer); }, + .xml => blk: { + var stdin_buf: [4096]u8 = undefined; + var stdin_reader = std.Io.File.reader(std.Io.File.stdin(), io, &stdin_buf); + break :blk xml.loadXmlInput(allocator, &stdin_reader.interface, db, parsed.max_rows, stderr_writer); + }, }; // Print row count and elapsed time to stderr when stderr is a TTY or --verbose is set. @@ -1915,7 +2016,7 @@ fn run( stderr_writer.flush() catch |err| std.log.err("failed to flush stderr: {}", .{err}); } - execQuery(allocator, db, query, stdout_writer, parsed.header, parsed.output_format) catch { + execQuery(allocator, db, query, stdout_writer, parsed.header, parsed.output_format, parsed.xml_root, parsed.xml_row) catch { stdout_writer.flush() catch |err| std.log.err("failed to flush output before fatal: {}", .{err}); fatalSqlWithContext(allocator, db, std.mem.span(c.sqlite3_errmsg(db)), stderr_writer); }; @@ -1966,14 +2067,14 @@ pub fn main(init: std.process.Init.Minimal) void { }, error.InvalidInputFormat => { stderr_writer.writeAll( - "error: unknown input format; supported: csv, tsv, json, ndjson\n", + "error: unknown input format; supported: csv, tsv, json, ndjson, xml\n", ) catch |werr| std.log.err("failed to write error message: {}", .{werr}); stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); }, error.InvalidOutputFormat => { stderr_writer.writeAll( - "error: unknown output format; supported: csv, tsv, json, ndjson\n", + "error: unknown output format; supported: csv, tsv, json, ndjson, xml\n", ) catch |werr| std.log.err("failed to write error message: {}", .{werr}); stderr_writer.flush() catch |ferr| std.log.err("failed to flush: {}", .{ferr}); std.process.exit(@intFromEnum(ExitCode.usage)); diff --git a/src/xml.zig b/src/xml.zig new file mode 100644 index 0000000..6d1d6c8 --- /dev/null +++ b/src/xml.zig @@ -0,0 +1,816 @@ +//! XML row-based I/O — input loading and output formatting. +//! +//! Input +//! ───── +//! loadXmlInput — read row-based XML from stdin, create table `t`, insert rows. +//! getXmlColumnNames — parse XML and return column names from the first row. +//! summarizeXml — parse XML, count rows, return column names (for --validate). +//! +//! Output +//! ────── +//! writeXmlHeader — emit the XML declaration and opening root element. +//! writeXmlRow — emit one SQLite result row as a compact XML row element. +//! writeXmlFooter — emit the closing root element. +//! +//! XML format (output) +//! ─────────────────── +//! +//! +//! Alice30 +//! +//! +//! XML format (input) +//! ────────────────── +//! Row-based only: each direct child of the root element is a row. +//! Each child of a row element is a column (element name = column name, +//! text content = value). Nested elements inside a column are captured as +//! raw XML strings. Supported entities: & < > " ' +//! CDATA sections are preserved as raw markup. + +const std = @import("std"); +const c = @import("c"); + +/// SQLITE_STATIC: caller manages string lifetime; SQLite must not free it. +const sqlite_static: c.sqlite3_destructor_type = null; + +const exit_usage: u8 = 1; +const exit_parse: u8 = 2; +const exit_sql: u8 = 3; + +fn fatal(comptime fmt: []const u8, writer: *std.Io.Writer, code: u8, args: anytype) noreturn { + writer.print("error: " ++ fmt ++ "\n", args) catch |err| std.log.err("failed to write error: {}", .{err}); + writer.flush() catch |err| std.log.err("failed to flush: {}", .{err}); + std.process.exit(code); +} + +fn createAllTextTable( + allocator: std.mem.Allocator, + db: *c.sqlite3, + cols: []const []const u8, + writer: *std.Io.Writer, +) void { + var sql: std.ArrayList(u8) = .empty; + defer sql.deinit(allocator); + sql.appendSlice(allocator, "CREATE TABLE t (") catch fatal("out of memory", writer, exit_parse, .{}); + for (cols, 0..) |col, i| { + if (i > 0) sql.appendSlice(allocator, ", ") catch fatal("out of memory", writer, exit_parse, .{}); + sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{}); + for (col) |ch| { + if (ch == '"') sql.append(allocator, '"') catch fatal("out of memory", writer, exit_parse, .{}); + sql.append(allocator, ch) catch fatal("out of memory", writer, exit_parse, .{}); + } + sql.appendSlice(allocator, "\" TEXT") catch fatal("out of memory", writer, exit_parse, .{}); + } + sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{}); + sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{}); + var errmsg: [*c]u8 = null; + if (c.sqlite3_exec(db, sql.items.ptr, null, null, &errmsg) != c.SQLITE_OK) { + const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); + if (errmsg != null) c.sqlite3_free(errmsg); + fatal("{s}", writer, exit_sql, .{msg}); + } +} + +fn prepareInsertStmt( + allocator: std.mem.Allocator, + db: *c.sqlite3, + n: usize, + writer: *std.Io.Writer, +) *c.sqlite3_stmt { + var sql: std.ArrayList(u8) = .empty; + defer sql.deinit(allocator); + sql.appendSlice(allocator, "INSERT INTO t VALUES (") catch fatal("out of memory", writer, exit_parse, .{}); + for (0..n) |i| { + if (i > 0) sql.append(allocator, ',') catch fatal("out of memory", writer, exit_parse, .{}); + sql.append(allocator, '?') catch fatal("out of memory", writer, exit_parse, .{}); + } + sql.appendSlice(allocator, ")") catch fatal("out of memory", writer, exit_parse, .{}); + sql.append(allocator, 0) catch fatal("out of memory", writer, exit_parse, .{}); + var stmt: ?*c.sqlite3_stmt = null; + if (c.sqlite3_prepare_v2(db, sql.items.ptr, -1, &stmt, null) != c.SQLITE_OK) + fatal("{s}", writer, exit_sql, .{std.mem.span(c.sqlite3_errmsg(db))}); + return stmt.?; +} + +fn beginTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void { + var errmsg: [*c]u8 = null; + if (c.sqlite3_exec(db, "BEGIN TRANSACTION", null, null, &errmsg) != c.SQLITE_OK) { + const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); + if (errmsg != null) c.sqlite3_free(errmsg); + fatal("{s}", writer, exit_sql, .{msg}); + } +} + +fn commitTransaction(db: *c.sqlite3, writer: *std.Io.Writer) void { + var errmsg: [*c]u8 = null; + if (c.sqlite3_exec(db, "COMMIT", null, null, &errmsg) != c.SQLITE_OK) { + const msg = if (errmsg != null) std.mem.span(errmsg) else std.mem.span(c.sqlite3_errmsg(db)); + if (errmsg != null) c.sqlite3_free(errmsg); + fatal("{s}", writer, exit_sql, .{msg}); + } + if (errmsg != null) c.sqlite3_free(errmsg); +} + +// ─── XML escaping ───────────────────────────────────── + +/// writeXmlEscaped(writer, s) → !void +/// +/// Pre: s is a valid UTF-8 slice +/// Post: s is emitted to writer with XML character entity escaping: +/// '&' → "&", '<' → "<", '>' → ">", +/// '"' → """, '\'' → "'" +pub fn writeXmlEscaped(writer: *std.Io.Writer, s: []const u8) !void { + for (s) |ch| { + switch (ch) { + '&' => try writer.writeAll("&"), + '<' => try writer.writeAll("<"), + '>' => try writer.writeAll(">"), + '"' => try writer.writeAll("""), + '\'' => try writer.writeAll("'"), + else => try writer.writeByte(ch), + } + } +} + +/// decodeEntities(allocator, s) → ![]u8 +/// +/// Pre: s is a valid UTF-8 slice, possibly containing XML entity references +/// Post: &→&, <→<, >→>, "→", '→' +/// Returns a newly allocated slice; caller must free. +fn decodeEntities(allocator: std.mem.Allocator, s: []const u8) ![]u8 { + var out: std.ArrayList(u8) = .empty; + errdefer out.deinit(allocator); + var i: usize = 0; + // Loop invariant: out contains the decoded prefix of s[0..i] + // Bounding function: s.len - i + while (i < s.len) { + if (s[i] == '&') { + if (std.mem.startsWith(u8, s[i..], "&")) { + try out.append(allocator, '&'); + i += 5; + } else if (std.mem.startsWith(u8, s[i..], "<")) { + try out.append(allocator, '<'); + i += 4; + } else if (std.mem.startsWith(u8, s[i..], ">")) { + try out.append(allocator, '>'); + i += 4; + } else if (std.mem.startsWith(u8, s[i..], """)) { + try out.append(allocator, '"'); + i += 6; + } else if (std.mem.startsWith(u8, s[i..], "'")) { + try out.append(allocator, '\''); + i += 6; + } else { + // Unknown or numeric entity — pass through as-is + try out.append(allocator, s[i]); + i += 1; + } + } else { + try out.append(allocator, s[i]); + i += 1; + } + } + return out.toOwnedSlice(allocator); +} + +// ─── Output formatting ──────────────────────────────── + +/// writeXmlHeader(writer, root_name) → !void +/// +/// Pre: root_name is a valid XML element name +/// Post: XML declaration and opening root element written: +/// \n\n +pub fn writeXmlHeader(writer: *std.Io.Writer, root_name: []const u8) !void { + try writer.writeAll("\n"); + try writer.writeByte('<'); + try writer.writeAll(root_name); + try writer.writeAll(">\n"); +} + +/// writeXmlRow(stmt, col_count, col_names, writer, row_name) → !void +/// +/// Pre: sqlite3_step returned SQLITE_ROW for stmt +/// col_count = sqlite3_column_count(stmt) > 0 +/// col_names.len ≥ col_count; row_name is a valid XML element name +/// Post: compact row written: value...\n +/// NULL → empty element body; all text values are XML-escaped +pub fn writeXmlRow( + stmt: *c.sqlite3_stmt, + col_count: c_int, + col_names: []const [*:0]const u8, + writer: *std.Io.Writer, + row_name: []const u8, +) !void { + try writer.writeByte('<'); + try writer.writeAll(row_name); + try writer.writeByte('>'); + // Loop invariant I: columns 0..i-1 have been written + // Bounding function: col_count - i + var i: c_int = 0; + while (i < col_count) : (i += 1) { + const name = std.mem.span(col_names[@intCast(i)]); + try writer.writeByte('<'); + try writer.writeAll(name); + try writer.writeByte('>'); + switch (c.sqlite3_column_type(stmt, i)) { + c.SQLITE_NULL => {}, + c.SQLITE_INTEGER => try writer.print("{d}", .{c.sqlite3_column_int64(stmt, i)}), + c.SQLITE_FLOAT => { + const f = c.sqlite3_column_double(stmt, i); + if (f == @trunc(f) and !std.math.isInf(f) and !std.math.isNan(f)) { + try writer.print("{d}", .{@as(i64, @intFromFloat(f))}); + } else { + try writer.print("{d}", .{f}); + } + }, + else => { + const ptr = c.sqlite3_column_text(stmt, i); + if (ptr != null) { + try writeXmlEscaped(writer, std.mem.span(@as([*:0]const u8, @ptrCast(ptr)))); + } + }, + } + try writer.writeAll("'); + } + try writer.writeAll("\n"); +} + +/// writeXmlFooter(writer, root_name) → !void +/// +/// Pre: root_name is a valid XML element name +/// Post: closing root element written: \n +pub fn writeXmlFooter(writer: *std.Io.Writer, root_name: []const u8) !void { + try writer.writeAll("\n"); +} + +// ─── XML Parser ─────────────────────────────────────── + +/// Minimal row-based XML parser with line/column error reporting. +/// +/// Supported constructs: +/// XML declaration, comments, processing instructions (all skipped in prologue) +/// Root element with arbitrary attributes +/// Row elements (direct children of root) with arbitrary attributes +/// Column elements: text content (entities decoded) or nested elements (raw XML) +/// CDATA sections (treated as raw content markup) +/// +/// Usage: +/// var p = XmlParser.init(data); +/// p.skipPrologue(err_writer); +/// const root = p.readRootOpen(err_writer); +/// while (try p.nextRow(allocator, root, err_writer)) |cols| { +/// defer { for (cols) |col| { if (col.value) |v| allocator.free(v); } allocator.free(cols); } +/// // use cols[i].name and cols[i].value +/// } +pub const XmlParser = struct { + data: []const u8, + pos: usize, + line: usize, + col: usize, + + /// A single column extracted from a row element. + pub const Column = struct { + /// Element name — a slice of the parser's data buffer (not allocated). + name: []const u8, + /// Decoded text content, or raw XML for mixed/nested content. + /// Null for self-closing elements (). Owned: free with allocator. + value: ?[]u8, + }; + + pub fn init(data: []const u8) XmlParser { + return .{ .data = data, .pos = 0, .line = 1, .col = 1 }; + } + + // ─── Primitives ────────────────────────────────────── + + fn peek(self: *const XmlParser) ?u8 { + return if (self.pos < self.data.len) self.data[self.pos] else null; + } + + fn advance(self: *XmlParser) void { + if (self.pos >= self.data.len) return; + if (self.data[self.pos] == '\n') { + self.line += 1; + self.col = 1; + } else { + self.col += 1; + } + self.pos += 1; + } + + fn skipWs(self: *XmlParser) void { + while (self.peek()) |ch| switch (ch) { + ' ', '\t', '\r', '\n' => self.advance(), + else => break, + }; + } + + fn startsWith(self: *const XmlParser, s: []const u8) bool { + return self.pos + s.len <= self.data.len and + std.mem.eql(u8, self.data[self.pos .. self.pos + s.len], s); + } + + fn fatalAt(self: *const XmlParser, comptime fmt: []const u8, err_writer: *std.Io.Writer, args: anytype) noreturn { + err_writer.print("error: xml: line {d}, col {d}: ", .{ self.line, self.col }) catch |err| std.log.err("failed to write error: {}", .{err}); + err_writer.print(fmt ++ "\n", args) catch |err| std.log.err("failed to write error: {}", .{err}); + err_writer.flush() catch |err| std.log.err("failed to flush: {}", .{err}); + std.process.exit(exit_parse); + } + + // ─── Skip helpers ──────────────────────────────────── + + /// Advance past the first occurrence of `delim`; fatal if not found. + fn skipUntilStr(self: *XmlParser, comptime delim: []const u8, err_writer: *std.Io.Writer) void { + while (self.pos + delim.len <= self.data.len) { + if (std.mem.eql(u8, self.data[self.pos .. self.pos + delim.len], delim)) { + for (delim) |_| self.advance(); + return; + } + self.advance(); + } + self.fatalAt("unexpected end of input looking for '{s}'", err_writer, .{delim}); + } + + fn skipComment(self: *XmlParser, err_writer: *std.Io.Writer) void { + // Pre: positioned at "", err_writer); + } + + fn skipProcessingInstruction(self: *XmlParser, err_writer: *std.Io.Writer) void { + // Pre: positioned at "", err_writer); + } + + fn skipWsAndMisc(self: *XmlParser, err_writer: *std.Io.Writer) void { + // Loop invariant: all whitespace and misc nodes before self.pos have been consumed + // Bounding function: self.data.len - self.pos + while (true) { + self.skipWs(); + if (self.startsWith("