From c6407be72e8011e2a9a2030d1e11962449765c14 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 27 Apr 2026 18:52:32 +0200 Subject: [PATCH 1/2] refactor(core): remove typed governance schema, generalize metadata merge (Phase 2 of #1172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete GovernanceMetadataSchema, GovernanceMetadata type, and governance field from EvalMetadata — governance data travels via case-level metadata: Record - Delete KNOWN_GOVERNANCE_FIELDS, EU_AI_ACT_RISK_TIERS, validateGovernance, isWellFormedControlId, and extractGovernanceBlock from eval-validator; lint moves to agentv-compliance skill + CI Action - Generalise mergeCaseGovernance → mergeSuiteMetadataPayload: same array-concat-dedup / scalar- case-wins rules now apply to all keys under metadata, not just governance - JSONL metadata.governance output is byte-identical to main (verified with dry-run UAT) Closes #1172 (Phase 2) Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/evaluation/metadata.ts | 69 +-------- .../evaluation/validation/eval-validator.ts | 135 ------------------ packages/core/src/evaluation/yaml-parser.ts | 69 +++++---- .../core/test/evaluation/metadata.test.ts | 45 +----- .../validation/eval-validator.test.ts | 120 ---------------- .../evaluation/yaml-parser-metadata.test.ts | 26 ---- 6 files changed, 37 insertions(+), 427 deletions(-) diff --git a/packages/core/src/evaluation/metadata.ts b/packages/core/src/evaluation/metadata.ts index bb740042..586281dd 100644 --- a/packages/core/src/evaluation/metadata.ts +++ b/packages/core/src/evaluation/metadata.ts @@ -1,49 +1,6 @@ import { z } from 'zod'; import type { JsonObject } from './types.js'; -/** - * Optional governance block on suite-level `EvalMetadata` and case-level `EvalTest.metadata`. - * - * The schema is intentionally permissive: every field is optional, unknown fields pass through, - * and value validation is delegated to a soft-warning lint in `eval-validator.ts`. The block - * captures convergence on public AI-governance taxonomies (NIST AI RMF, ISO/IEC 42001, EU AI Act, - * OWASP LLM Top 10, MITRE ATLAS) without prescribing a workflow or hard-coding ID lists. - * - * Versioning lives in field names (`owasp_llm_top_10_2025`) so that when a standard revises and - * redefines IDs (OWASP LLM Top 10 v2025 vs v1.1), agentv ships a new field rather than - * silently changing the meaning of existing tags. - * - * To extend with a new versioned taxonomy: add an optional `string[]` field here, document it in - * the README under examples/red-team/, and propagate through the `agentv eval` JSONL output. - */ -const GovernanceMetadataSchema = z - .object({ - /** Schema version of this governance block itself (lets the block evolve). */ - schema_version: z.string().optional(), - /** OWASP LLM Top 10 v2025 IDs (LLM01..LLM10). */ - owasp_llm_top_10_2025: z.array(z.string()).optional(), - /** OWASP Top 10 for Agentic Applications v2025 (T1..T10). */ - owasp_agentic_top_10_2025: z.array(z.string()).optional(), - /** MITRE ATLAS technique IDs (e.g. AML.T0051, AML.T0075). */ - mitre_atlas: z.array(z.string()).optional(), - /** - * Cross-framework controls. String format: `-:`. - * Custom prefixes are first-class (e.g. `INTERNAL-AI-POLICY-3.2:CTRL-7`). - */ - controls: z.array(z.string()).optional(), - /** - * Risk vocabulary anchored to EU AI Act terminology by default. - * Allowed values: `prohibited | high | limited | minimal`. - * Other strings (e.g. NIST 800-30 `low | moderate | high`) are accepted with a soft warning. - */ - risk_tier: z.string().optional(), - /** Human-readable owner (team name, group). */ - owner: z.string().optional(), - }) - .passthrough(); - -export type GovernanceMetadata = z.infer; - const MetadataSchema = z.object({ name: z .string() @@ -61,35 +18,12 @@ const MetadataSchema = z.object({ agentv: z.string().optional(), }) .optional(), - governance: GovernanceMetadataSchema.optional(), }); export type EvalMetadata = z.infer; -/** - * Extract the governance block from a suite-level YAML. Accepts either: - * - top-level `governance:` (consistent with `description`, `tags`, etc.) - * - nested `metadata.governance:` (matches the case-level shape) - * Top-level wins if both are present. - */ -function extractGovernance(suite: JsonObject): unknown { - if (suite.governance !== undefined) { - return suite.governance; - } - const wrapper = suite.metadata; - if (wrapper && typeof wrapper === 'object' && !Array.isArray(wrapper)) { - return (wrapper as Record).governance; - } - return undefined; -} - export function parseMetadata(suite: JsonObject): EvalMetadata | undefined { - const hasName = typeof suite.name === 'string'; - const governanceRaw = extractGovernance(suite); - - // Trigger metadata parsing when `name` is present, OR when a governance block exists - // (so authors can attach governance to suites that don't have a name). - if (!hasName && governanceRaw === undefined) { + if (typeof suite.name !== 'string') { return undefined; } @@ -101,6 +35,5 @@ export function parseMetadata(suite: JsonObject): EvalMetadata | undefined { tags: suite.tags, license: suite.license, requires: suite.requires, - governance: governanceRaw, }); } diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts index be91f3b5..b72e59e3 100644 --- a/packages/core/src/evaluation/validation/eval-validator.ts +++ b/packages/core/src/evaluation/validation/eval-validator.ts @@ -197,10 +197,6 @@ export async function validateEvalFile(filePath: string): Promise-:` control string. Custom prefixes are first-class - * (e.g. `INTERNAL-AI-POLICY-3.2:CTRL-7`) — only the *shape* is checked. Returns true if the - * string has the required `:` separator AND the framework segment ends with a version-looking - * token (digit-or-dot suffix, e.g. `1.0`, `2024`, `3.2`). Misses on this heuristic produce - * a soft warning, never an error. - */ -function isWellFormedControlId(value: string): boolean { - const colonIdx = value.indexOf(':'); - if (colonIdx <= 0 || colonIdx === value.length - 1) { - return false; - } - const prefix = value.slice(0, colonIdx); - const lastSegment = prefix.split('-').pop() ?? ''; - // Version-looking: starts with a digit or contains a dot. - return /[0-9]/.test(lastSegment.charAt(0)) || lastSegment.includes('.'); -} - -/** Top-level `governance:` wins; falls back to nested `metadata.governance:`. */ -function extractGovernanceBlock(parsed: JsonObject): JsonValue | undefined { - if (parsed.governance !== undefined) { - return parsed.governance; - } - if (isObject(parsed.metadata)) { - return (parsed.metadata as JsonObject).governance; - } - return undefined; -} - -function validateGovernance( - block: JsonValue | undefined, - location: string, - filePath: string, - errors: ValidationError[], -): void { - if (block === undefined) return; - if (!isObject(block)) { - errors.push({ - severity: 'warning', - filePath, - location, - message: `'${location}' must be an object; got ${Array.isArray(block) ? 'array' : typeof block}.`, - }); - return; - } - - for (const key of Object.keys(block)) { - if (!KNOWN_GOVERNANCE_FIELDS.has(key)) { - errors.push({ - severity: 'warning', - filePath, - location: `${location}.${key}`, - message: `Unknown governance field '${key}'. Known fields: ${[...KNOWN_GOVERNANCE_FIELDS].join(', ')}.`, - }); - } - } - - const controls = block.controls; - if (controls !== undefined) { - if (!Array.isArray(controls)) { - errors.push({ - severity: 'warning', - filePath, - location: `${location}.controls`, - message: "'controls' should be an array of '-:' strings.", - }); - } else { - for (let i = 0; i < controls.length; i++) { - const entry = controls[i]; - if (typeof entry !== 'string') { - errors.push({ - severity: 'warning', - filePath, - location: `${location}.controls[${i}]`, - message: 'Control entries must be strings.', - }); - } else if (!isWellFormedControlId(entry)) { - errors.push({ - severity: 'warning', - filePath, - location: `${location}.controls[${i}]`, - message: `Malformed control '${entry}'. Expected '-:' (e.g. NIST-AI-RMF-1.0:MEASURE-2.7). Custom prefixes are allowed.`, - }); - } - } - } - } - - const riskTier = block.risk_tier; - if ( - riskTier !== undefined && - typeof riskTier === 'string' && - !EU_AI_ACT_RISK_TIERS.has(riskTier) - ) { - errors.push({ - severity: 'warning', - filePath, - location: `${location}.risk_tier`, - message: `'risk_tier: ${riskTier}' is outside EU AI Act vocabulary (prohibited | high | limited | minimal). Other vocabularies (e.g. NIST 800-30) are accepted but flagged.`, - }); - } -} diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 444ec1ce..94b96c8c 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -79,7 +79,7 @@ export { } from './loaders/config-loader.js'; export type { AgentVConfig, CacheConfig, ExecutionDefaults } from './loaders/config-loader.js'; export { detectFormat } from './loaders/jsonl-parser.js'; -export type { EvalMetadata, GovernanceMetadata } from './metadata.js'; +export type { EvalMetadata } from './metadata.js'; const ANSI_YELLOW = '\u001b[33m'; const ANSI_RED = '\u001b[31m'; @@ -362,7 +362,7 @@ async function loadTestsFromYaml( const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir); // Suite-level governance block (top-level `governance:` wins over `metadata.governance:`). - // Per-case `metadata.governance` merges with this — arrays concatenate, scalars override. + // Merged into each case's `metadata.governance` via mergeSuiteMetadataPayload. const suiteGovernance = extractSuiteGovernance(suite); // Resolve suite-level input (prepended to each test's input messages) @@ -547,12 +547,13 @@ async function loadTestsFromYaml( const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir); const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace); - // Parse per-case metadata, then merge suite-level governance onto the case. - // Arrays concatenate (suite controls + case controls), scalars on the case win. + // Parse per-case metadata, then merge suite-level metadata payload. + // Arrays concatenate (suite-first, deduplicated), scalars on the case win. const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? (testCaseConfig.metadata as Record) : undefined; - const metadata = mergeCaseGovernance(rawCaseMetadata, suiteGovernance); + const suitePayload = suiteGovernance !== undefined ? { governance: suiteGovernance } : undefined; + const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload); // Extract per-test targets override (matrix evaluation) const caseTargets = extractTargetsFromTestCase(testCaseConfig as JsonObject); @@ -953,44 +954,40 @@ function extractSuiteGovernance(suite: RawTestSuite): Record | } /** - * Merge suite-level governance into a case's metadata. Arrays concatenate (suite controls + - * case controls, deduplicated); scalar fields on the case override the suite. The case's - * other metadata keys are preserved verbatim. Returns undefined if neither side has anything. + * Merge a suite-level metadata payload into a case's metadata map. The same rules apply to + * every key in the payload: arrays concatenate suite-first and deduplicate; nested objects + * recurse; scalar fields on the case win; suite fills in keys the case omits. */ -function mergeCaseGovernance( +function mergeSuiteMetadataPayload( caseMetadata: Record | undefined, - suiteGovernance: Record | undefined, + suitePayload: Record | undefined, ): Record | undefined { - const caseGovernance = isJsonObject(caseMetadata?.governance) - ? (caseMetadata?.governance as Record) - : undefined; - - if (!suiteGovernance && !caseGovernance) { - return caseMetadata; - } - - const merged: Record = { ...(suiteGovernance ?? {}) }; - if (caseGovernance) { - for (const [key, caseValue] of Object.entries(caseGovernance)) { - const suiteValue = merged[key]; - if (Array.isArray(suiteValue) && Array.isArray(caseValue)) { - const seen = new Set(); - const out: unknown[] = []; - for (const v of [...suiteValue, ...caseValue]) { - const key = typeof v === 'string' ? v : JSON.stringify(v); - if (!seen.has(key)) { - seen.add(key); - out.push(v); - } + if (!suitePayload) return caseMetadata; + + const result: Record = { ...(caseMetadata ?? {}) }; + for (const [key, suiteVal] of Object.entries(suitePayload)) { + const caseVal = result[key]; + if (Array.isArray(suiteVal) && Array.isArray(caseVal)) { + const seen = new Set(); + const out: unknown[] = []; + for (const v of [...suiteVal, ...caseVal]) { + const k = typeof v === 'string' ? v : JSON.stringify(v); + if (!seen.has(k)) { + seen.add(k); + out.push(v); } - merged[key] = out; - } else { - merged[key] = caseValue; } + result[key] = out; + } else if (isJsonObject(suiteVal) && isJsonObject(caseVal)) { + result[key] = mergeSuiteMetadataPayload( + caseVal as Record, + suiteVal as Record, + ); + } else if (caseVal === undefined) { + result[key] = suiteVal; } } - - return { ...(caseMetadata ?? {}), governance: merged }; + return result; } function logWarning(message: string, details?: readonly string[]): void { diff --git a/packages/core/test/evaluation/metadata.test.ts b/packages/core/test/evaluation/metadata.test.ts index 96cec9ab..e444f720 100644 --- a/packages/core/test/evaluation/metadata.test.ts +++ b/packages/core/test/evaluation/metadata.test.ts @@ -53,49 +53,10 @@ describe('parseMetadata', () => { }); }); - it('parses an optional governance block at the top level', () => { + it('returns undefined when only governance is present and name is absent', () => { const result = parseMetadata({ - name: 'red-team', - governance: { - schema_version: '1.0', - owasp_llm_top_10_2025: ['LLM01'], - controls: ['NIST-AI-RMF-1.0:MEASURE-2.7'], - risk_tier: 'high', - }, + governance: { risk_tier: 'high_risk' }, }); - expect(result?.governance).toEqual({ - schema_version: '1.0', - owasp_llm_top_10_2025: ['LLM01'], - controls: ['NIST-AI-RMF-1.0:MEASURE-2.7'], - risk_tier: 'high', - }); - }); - - it('parses governance from nested metadata.governance form', () => { - const result = parseMetadata({ - name: 'red-team', - metadata: { - governance: { owasp_llm_top_10_2025: ['LLM06'], owner: 'security-team' }, - }, - }); - expect(result?.governance).toEqual({ - owasp_llm_top_10_2025: ['LLM06'], - owner: 'security-team', - }); - }); - - it('returns metadata when only governance is present (no name)', () => { - const result = parseMetadata({ - governance: { risk_tier: 'high' }, - }); - expect(result).toEqual({ governance: { risk_tier: 'high' } }); - }); - - it('passes unknown governance keys through (custom taxonomies extend without forking)', () => { - const result = parseMetadata({ - name: 'red-team', - governance: { custom_company_taxonomy: ['X-1'] }, - }); - expect(result?.governance).toEqual({ custom_company_taxonomy: ['X-1'] }); + expect(result).toBeUndefined(); }); }); diff --git a/packages/core/test/evaluation/validation/eval-validator.test.ts b/packages/core/test/evaluation/validation/eval-validator.test.ts index 975be6a0..7992160b 100644 --- a/packages/core/test/evaluation/validation/eval-validator.test.ts +++ b/packages/core/test/evaluation/validation/eval-validator.test.ts @@ -495,126 +495,6 @@ tests: }); }); - describe('governance metadata validation', () => { - it('passes a well-formed governance block with custom prefixes (no warnings)', async () => { - const filePath = path.join(tempDir, 'governance-ok.yaml'); - await writeFile( - filePath, - `name: red-team -governance: - schema_version: "1.0" - owasp_llm_top_10_2025: [LLM01] - controls: - - NIST-AI-RMF-1.0:MEASURE-2.7 - - INTERNAL-AI-POLICY-3.2:CTRL-7 - risk_tier: high -tests: - - id: case-1 - input: "Query" -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings).toHaveLength(0); - }); - - it('warns on unknown governance fields without erroring', async () => { - const filePath = path.join(tempDir, 'governance-typo.yaml'); - await writeFile( - filePath, - `name: red-team -governance: - owasp_lm_top_10_2025: [LLM01] -tests: - - id: case-1 - input: "Query" -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect( - warnings.some((w) => w.message.includes("Unknown governance field 'owasp_lm_top_10_2025'")), - ).toBe(true); - }); - - it('warns on malformed control strings (missing version segment)', async () => { - const filePath = path.join(tempDir, 'governance-malformed-control.yaml'); - await writeFile( - filePath, - `name: red-team -governance: - controls: ["NIST-AI-RMF:MEASURE-2.7"] -tests: - - id: case-1 - input: "Query" -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((w) => w.message.includes('Malformed control'))).toBe(true); - }); - - it('warns on risk_tier outside EU AI Act vocabulary', async () => { - const filePath = path.join(tempDir, 'governance-risk.yaml'); - await writeFile( - filePath, - `name: red-team -governance: - risk_tier: critical -tests: - - id: case-1 - input: "Query" -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((w) => w.message.includes('EU AI Act vocabulary'))).toBe(true); - }); - - it('warns on case-level governance typos', async () => { - const filePath = path.join(tempDir, 'governance-case.yaml'); - await writeFile( - filePath, - `tests: - - id: case-1 - input: "Query" - metadata: - governance: - owasp_lm_top_10_2025: [LLM01] -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.some((w) => w.location?.includes('tests[0].metadata.governance'))).toBe(true); - }); - - it('does not warn when no governance block is present', async () => { - const filePath = path.join(tempDir, 'governance-absent.yaml'); - await writeFile( - filePath, - `tests: - - id: case-1 - input: "Query" -`, - ); - - const result = await validateEvalFile(filePath); - expect(result.valid).toBe(true); - const warnings = result.errors.filter((e) => e.severity === 'warning'); - expect(warnings.filter((w) => w.location?.startsWith('governance'))).toHaveLength(0); - }); - }); - describe('tests as string path', () => { it('validates tests string has valid extension', async () => { const filePath = path.join(tempDir, 'tests-bad-ext.yaml'); diff --git a/packages/core/test/evaluation/yaml-parser-metadata.test.ts b/packages/core/test/evaluation/yaml-parser-metadata.test.ts index bb27f8c3..12dea2bd 100644 --- a/packages/core/test/evaluation/yaml-parser-metadata.test.ts +++ b/packages/core/test/evaluation/yaml-parser-metadata.test.ts @@ -133,32 +133,6 @@ tests: expect(suite.metadata?.tags).toEqual(['unit', 'integration', 'smoke']); }); - it('parses suite-level governance from top-level governance:', async () => { - const { filePath, dir } = createTempYaml(` -name: red-team -governance: - schema_version: "1.0" - owasp_llm_top_10_2025: [LLM01] - controls: - - NIST-AI-RMF-1.0:MEASURE-2.7 - risk_tier: high - owner: security-team -tests: - - id: case-1 - criteria: "Refuses" - input: "Query" -`); - - const suite = await loadTestSuite(filePath, dir); - expect(suite.metadata?.governance).toEqual({ - schema_version: '1.0', - owasp_llm_top_10_2025: ['LLM01'], - controls: ['NIST-AI-RMF-1.0:MEASURE-2.7'], - risk_tier: 'high', - owner: 'security-team', - }); - }); - it('merges case-level governance into suite-level (arrays concat, scalars override)', async () => { const { filePath, dir } = createTempYaml(` name: red-team From 407c0e9520c0a6da003c7c9ca2c120a227db47a5 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 27 Apr 2026 19:00:10 +0200 Subject: [PATCH 2/2] style: auto-fix biome format issues Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/evaluation/validation/eval-validator.ts | 1 - packages/core/src/evaluation/yaml-parser.ts | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/evaluation/validation/eval-validator.ts b/packages/core/src/evaluation/validation/eval-validator.ts index b72e59e3..d3b71ddf 100644 --- a/packages/core/src/evaluation/validation/eval-validator.ts +++ b/packages/core/src/evaluation/validation/eval-validator.ts @@ -1008,4 +1008,3 @@ function validateConversationMode( } } } - diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts index 94b96c8c..8aa397c4 100644 --- a/packages/core/src/evaluation/yaml-parser.ts +++ b/packages/core/src/evaluation/yaml-parser.ts @@ -552,7 +552,8 @@ async function loadTestsFromYaml( const rawCaseMetadata = isJsonObject(testCaseConfig.metadata) ? (testCaseConfig.metadata as Record) : undefined; - const suitePayload = suiteGovernance !== undefined ? { governance: suiteGovernance } : undefined; + const suitePayload = + suiteGovernance !== undefined ? { governance: suiteGovernance } : undefined; const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload); // Extract per-test targets override (matrix evaluation)