ci: Hard require that generated code is updated in CI#2183
ci: Hard require that generated code is updated in CI#2183cgwalters wants to merge 1 commit intobootc-dev:mainfrom
Conversation
`just validate` now verifies that generated code is updated. We got burned badly by someone editing the generated code, and not noticing until the release PR which reverted the change. While I was working on this, I wanted to preserve the property that e.g. ostree and other heavier dependencies aren't required on the host system - they're only part of the container build. So using `podman build --output`, we can build generated docs etc as part of the container build, and copy them out to the host in the Justfile. Assisted-by: OpenCode (Claude Sonnet 4.6) Signed-off-by: Colin Walters <walters@verbum.org>
There was a problem hiding this comment.
Code Review
This pull request refactors the generation and validation of project artifacts, including man pages, JSON schemas, and TMT integration plans. It introduces a containerized workflow for binary-derived files to ensure consistency and adds a --check mode to the xtask CLI for CI integration. Review feedback suggests refactoring duplicated JSON schema processing logic into a helper function, correcting a doc comment regarding disk access in TMT generation, and adopting a recursive strategy for man page discovery to improve maintainability.
| /// Update JSON schema files | ||
| #[context("Updating JSON schemas")] | ||
| fn update_json_schemas(sh: &Shell) -> Result<()> { | ||
| for (of, target) in [ | ||
| ("host", "docs/src/host-v1.schema.json"), | ||
| ("progress", "docs/src/progress-v0.schema.json"), | ||
| ] { | ||
| for (of, target) in JSON_SCHEMAS { | ||
| let schema = cmd!(sh, "cargo run -q -- internals print-json-schema --of={of}").read()?; | ||
| // Ensure a consistent trailing newline so files are stable across editors. | ||
| let schema = if schema.ends_with('\n') { | ||
| schema | ||
| } else { | ||
| format!("{schema}\n") | ||
| }; | ||
| std::fs::write(target, &schema)?; | ||
| println!("Updated {target}"); | ||
| } | ||
| Ok(()) | ||
| } | ||
|
|
||
| /// Update all generated files | ||
| /// This is the main command developers should use to update generated content. | ||
| /// It handles: | ||
| /// - Creating new man page templates for new commands | ||
| /// - Syncing CLI options to existing man pages | ||
| /// - Updating JSON schema files | ||
| #[context("Updating generated files")] | ||
| fn update_generated(sh: &Shell) -> Result<()> { | ||
| // Update man pages (create new templates + sync options) | ||
| man::update_manpages(sh)?; | ||
|
|
||
| // Update JSON schemas | ||
| update_json_schemas(sh)?; | ||
|
|
||
| // Update TMT integration.fmf | ||
| tmt::update_integration()?; | ||
|
|
||
| /// Check that JSON schema files are up to date. | ||
| /// Fails with an error if any file would change, similar to `cargo fmt --check`. | ||
| #[context("Checking JSON schemas")] | ||
| fn check_json_schemas(sh: &Shell) -> Result<()> { | ||
| for (of, target) in JSON_SCHEMAS { | ||
| let schema = cmd!(sh, "cargo run -q -- internals print-json-schema --of={of}").read()?; | ||
| // Apply the same normalization as update_json_schemas so we compare | ||
| // exactly what would be written to disk. | ||
| let generated = if schema.ends_with('\n') { | ||
| schema | ||
| } else { | ||
| format!("{schema}\n") | ||
| }; | ||
| let on_disk = | ||
| std::fs::read_to_string(target).with_context(|| format!("Reading {target}"))?; | ||
| if generated != on_disk { | ||
| anyhow::bail!( | ||
| "{target} is out of date; run `cargo xtask update-generated` to update it" | ||
| ); | ||
| } | ||
| } | ||
| Ok(()) | ||
| } |
There was a problem hiding this comment.
The logic for generating, normalizing, and checking JSON schemas is duplicated between update_json_schemas and check_json_schemas. Refactoring this into a helper function improves maintainability and ensures consistency in how schemas are processed (e.g., the trailing newline normalization).
/// Generate and normalize a JSON schema from the binary
fn generate_normalized_json_schema(sh: &Shell, of: &str) -> Result<String> {
let schema = cmd!(sh, "cargo run -q -- internals print-json-schema --of={of}").read()?;
// Ensure a consistent trailing newline so files are stable across editors.
Ok(if schema.ends_with('\n') {
schema
} else {
format!("{schema}\n")
})
}
/// Update JSON schema files
#[context("Updating JSON schemas")]
fn update_json_schemas(sh: &Shell) -> Result<()> {
for (of, target) in JSON_SCHEMAS {
let schema = generate_normalized_json_schema(sh, of)?;
std::fs::write(target, &schema)?;
println!("Updated {target}");
}
Ok(())
}
/// Check that JSON schema files are up to date.
/// Fails with an error if any file would change, similar to `cargo fmt --check`.
#[context("Checking JSON schemas")]
fn check_json_schemas(sh: &Shell) -> Result<()> {
for (of, target) in JSON_SCHEMAS {
let generated = generate_normalized_json_schema(sh, of)?;
let on_disk =
std::fs::read_to_string(target).with_context(|| format!("Reading {target}"))?;
if generated != on_disk {
anyhow::bail!(
"{target} is out of date; run `cargo xtask update-generated` to update it"
);
}
}
Ok(())
}| /// Pure function: compute the content of tests.fmf and integration.fmf from | ||
| /// the test file metadata in tmt/tests/booted/, without touching disk. |
There was a problem hiding this comment.
The doc comment for generate_integration states that it does not touch the disk, but the implementation performs several read operations (e.g., std::fs::read_dir and std::fs::read_to_string). It would be more accurate to state that it does not write to the disk or that it is side-effect free.
| /// Pure function: compute the content of tests.fmf and integration.fmf from | |
| /// the test file metadata in tmt/tests/booted/, without touching disk. | |
| /// Pure function: compute the content of tests.fmf and integration.fmf from | |
| /// the test file metadata in tmt/tests/booted/, without writing to disk. |
| let mut commands_to_check = Vec::new(); | ||
| for cmd in &cli_structure.subcommands { | ||
| commands_to_check.push(vec![cmd.name.clone()]); | ||
| for subcmd in &cmd.subcommands { | ||
| commands_to_check.push(vec![cmd.name.clone(), subcmd.name.clone()]); | ||
| } | ||
| } | ||
| for command_parts in &commands_to_check { | ||
| let filename = if command_parts.len() == 1 { | ||
| format!("bootc-{}.8.md", command_parts[0]) | ||
| } else { | ||
| format!("bootc-{}.8.md", command_parts.join("-")) | ||
| }; |
There was a problem hiding this comment.
This logic only checks for man pages up to two levels of subcommands. While the current CLI structure is shallow, using a recursive approach would be more robust and future-proof as the CLI grows. Additionally, the filename generation logic can be simplified by using command_parts.join("-") directly.
let mut commands_to_check = Vec::new();
fn collect_commands(cmd: &CliCommand, path: Vec<String>, acc: &mut Vec<Vec<String>>) {
for sub in &cmd.subcommands {
let mut sub_path = path.clone();
sub_path.push(sub.name.clone());
acc.push(sub_path.clone());
collect_commands(sub, sub_path, acc);
}
}
collect_commands(&cli_structure, Vec::new(), &mut commands_to_check);
for command_parts in &commands_to_check {
let filename = format!("bootc-{}.8.md", command_parts.join("-"));
just validatenow verifies that generated code is updated. We got burned badly by someone editing the generated code, and not noticing until the release PR which reverted the change.While I was working on this, I wanted to preserve the property that e.g. ostree and other heavier dependencies aren't required on the host system - they're only part of the container build.
So using
podman build --output, we can build generated docs etc as part of the container build, and copy them out to the host in the Justfile.Assisted-by: OpenCode (Claude Sonnet 4.6)