Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 1 addition & 68 deletions packages/core/src/evaluation/metadata.ts
Original file line number Diff line number Diff line change
@@ -1,49 +1,6 @@
import { z } from 'zod';
import type { JsonObject } from './types.js';

/**
* Optional governance block on suite-level `EvalMetadata` and case-level `EvalTest.metadata`.
*
* The schema is intentionally permissive: every field is optional, unknown fields pass through,
* and value validation is delegated to a soft-warning lint in `eval-validator.ts`. The block
* captures convergence on public AI-governance taxonomies (NIST AI RMF, ISO/IEC 42001, EU AI Act,
* OWASP LLM Top 10, MITRE ATLAS) without prescribing a workflow or hard-coding ID lists.
*
* Versioning lives in field names (`owasp_llm_top_10_2025`) so that when a standard revises and
* redefines IDs (OWASP LLM Top 10 v2025 vs v1.1), agentv ships a new field rather than
* silently changing the meaning of existing tags.
*
* To extend with a new versioned taxonomy: add an optional `string[]` field here, document it in
* the README under examples/red-team/, and propagate through the `agentv eval` JSONL output.
*/
const GovernanceMetadataSchema = z
.object({
/** Schema version of this governance block itself (lets the block evolve). */
schema_version: z.string().optional(),
/** OWASP LLM Top 10 v2025 IDs (LLM01..LLM10). */
owasp_llm_top_10_2025: z.array(z.string()).optional(),
/** OWASP Top 10 for Agentic Applications v2025 (T1..T10). */
owasp_agentic_top_10_2025: z.array(z.string()).optional(),
/** MITRE ATLAS technique IDs (e.g. AML.T0051, AML.T0075). */
mitre_atlas: z.array(z.string()).optional(),
/**
* Cross-framework controls. String format: `<FRAMEWORK>-<VERSION>:<ID>`.
* Custom prefixes are first-class (e.g. `INTERNAL-AI-POLICY-3.2:CTRL-7`).
*/
controls: z.array(z.string()).optional(),
/**
* Risk vocabulary anchored to EU AI Act terminology by default.
* Allowed values: `prohibited | high | limited | minimal`.
* Other strings (e.g. NIST 800-30 `low | moderate | high`) are accepted with a soft warning.
*/
risk_tier: z.string().optional(),
/** Human-readable owner (team name, group). */
owner: z.string().optional(),
})
.passthrough();

export type GovernanceMetadata = z.infer<typeof GovernanceMetadataSchema>;

const MetadataSchema = z.object({
name: z
.string()
Expand All @@ -61,35 +18,12 @@ const MetadataSchema = z.object({
agentv: z.string().optional(),
})
.optional(),
governance: GovernanceMetadataSchema.optional(),
});

export type EvalMetadata = z.infer<typeof MetadataSchema>;

/**
* Extract the governance block from a suite-level YAML. Accepts either:
* - top-level `governance:` (consistent with `description`, `tags`, etc.)
* - nested `metadata.governance:` (matches the case-level shape)
* Top-level wins if both are present.
*/
function extractGovernance(suite: JsonObject): unknown {
if (suite.governance !== undefined) {
return suite.governance;
}
const wrapper = suite.metadata;
if (wrapper && typeof wrapper === 'object' && !Array.isArray(wrapper)) {
return (wrapper as Record<string, unknown>).governance;
}
return undefined;
}

export function parseMetadata(suite: JsonObject): EvalMetadata | undefined {
const hasName = typeof suite.name === 'string';
const governanceRaw = extractGovernance(suite);

// Trigger metadata parsing when `name` is present, OR when a governance block exists
// (so authors can attach governance to suites that don't have a name).
if (!hasName && governanceRaw === undefined) {
if (typeof suite.name !== 'string') {
return undefined;
}

Expand All @@ -101,6 +35,5 @@ export function parseMetadata(suite: JsonObject): EvalMetadata | undefined {
tags: suite.tags,
license: suite.license,
requires: suite.requires,
governance: governanceRaw,
});
}
136 changes: 0 additions & 136 deletions packages/core/src/evaluation/validation/eval-validator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,6 @@ export async function validateEvalFile(filePath: string): Promise<ValidationResu
// Validate metadata fields
validateMetadata(parsed, absolutePath, errors);

// Soft-warning lint for the optional governance block (suite-level).
// Accepts both top-level `governance:` and nested `metadata.governance:`.
validateGovernance(extractGovernanceBlock(parsed), 'governance', absolutePath, errors);

// Warn on deprecated or unknown top-level fields
for (const key of Object.keys(parsed)) {
const deprecationMessage = DEPRECATED_TOP_LEVEL_FIELDS.get(key);
Expand Down Expand Up @@ -463,16 +459,6 @@ export async function validateEvalFile(filePath: string): Promise<ValidationResu
// Cross-field validation for conversation mode
validateConversationMode(evalCase, location, absolutePath, errors);

// Soft-warning lint for case-level governance block.
if (isObject(evalCase.metadata)) {
validateGovernance(
(evalCase.metadata as JsonObject).governance,
`${location}.metadata.governance`,
absolutePath,
errors,
);
}

await validateWorkspaceConfig(
evalCase.workspace,
absolutePath,
Expand Down Expand Up @@ -1022,125 +1008,3 @@ function validateConversationMode(
}
}
}

/**
* Recognized fields inside the optional `governance` block. Any other key produces a soft
* warning so that authors notice typos like `owasp_lm_top_10_2025`. Unknown frameworks (e.g.
* a future `iso_42001_2027`) require updating this set in the same PR — that is intentional;
* the alternative (silent acceptance) lets typos rot in production evals.
*/
const KNOWN_GOVERNANCE_FIELDS = new Set([
'schema_version',
'owasp_llm_top_10_2025',
'owasp_agentic_top_10_2025',
'mitre_atlas',
'controls',
'risk_tier',
'owner',
]);

/** EU AI Act risk-tier vocabulary (the default; other strings produce a soft warning). */
const EU_AI_ACT_RISK_TIERS = new Set(['prohibited', 'high', 'limited', 'minimal']);

/**
* Validates a `<FRAMEWORK>-<VERSION>:<ID>` control string. Custom prefixes are first-class
* (e.g. `INTERNAL-AI-POLICY-3.2:CTRL-7`) — only the *shape* is checked. Returns true if the
* string has the required `:` separator AND the framework segment ends with a version-looking
* token (digit-or-dot suffix, e.g. `1.0`, `2024`, `3.2`). Misses on this heuristic produce
* a soft warning, never an error.
*/
function isWellFormedControlId(value: string): boolean {
const colonIdx = value.indexOf(':');
if (colonIdx <= 0 || colonIdx === value.length - 1) {
return false;
}
const prefix = value.slice(0, colonIdx);
const lastSegment = prefix.split('-').pop() ?? '';
// Version-looking: starts with a digit or contains a dot.
return /[0-9]/.test(lastSegment.charAt(0)) || lastSegment.includes('.');
}

/** Top-level `governance:` wins; falls back to nested `metadata.governance:`. */
function extractGovernanceBlock(parsed: JsonObject): JsonValue | undefined {
if (parsed.governance !== undefined) {
return parsed.governance;
}
if (isObject(parsed.metadata)) {
return (parsed.metadata as JsonObject).governance;
}
return undefined;
}

function validateGovernance(
block: JsonValue | undefined,
location: string,
filePath: string,
errors: ValidationError[],
): void {
if (block === undefined) return;
if (!isObject(block)) {
errors.push({
severity: 'warning',
filePath,
location,
message: `'${location}' must be an object; got ${Array.isArray(block) ? 'array' : typeof block}.`,
});
return;
}

for (const key of Object.keys(block)) {
if (!KNOWN_GOVERNANCE_FIELDS.has(key)) {
errors.push({
severity: 'warning',
filePath,
location: `${location}.${key}`,
message: `Unknown governance field '${key}'. Known fields: ${[...KNOWN_GOVERNANCE_FIELDS].join(', ')}.`,
});
}
}

const controls = block.controls;
if (controls !== undefined) {
if (!Array.isArray(controls)) {
errors.push({
severity: 'warning',
filePath,
location: `${location}.controls`,
message: "'controls' should be an array of '<FRAMEWORK>-<VERSION>:<ID>' strings.",
});
} else {
for (let i = 0; i < controls.length; i++) {
const entry = controls[i];
if (typeof entry !== 'string') {
errors.push({
severity: 'warning',
filePath,
location: `${location}.controls[${i}]`,
message: 'Control entries must be strings.',
});
} else if (!isWellFormedControlId(entry)) {
errors.push({
severity: 'warning',
filePath,
location: `${location}.controls[${i}]`,
message: `Malformed control '${entry}'. Expected '<FRAMEWORK>-<VERSION>:<ID>' (e.g. NIST-AI-RMF-1.0:MEASURE-2.7). Custom prefixes are allowed.`,
});
}
}
}
}

const riskTier = block.risk_tier;
if (
riskTier !== undefined &&
typeof riskTier === 'string' &&
!EU_AI_ACT_RISK_TIERS.has(riskTier)
) {
errors.push({
severity: 'warning',
filePath,
location: `${location}.risk_tier`,
message: `'risk_tier: ${riskTier}' is outside EU AI Act vocabulary (prohibited | high | limited | minimal). Other vocabularies (e.g. NIST 800-30) are accepted but flagged.`,
});
}
}
70 changes: 34 additions & 36 deletions packages/core/src/evaluation/yaml-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ export {
} from './loaders/config-loader.js';
export type { AgentVConfig, CacheConfig, ExecutionDefaults } from './loaders/config-loader.js';
export { detectFormat } from './loaders/jsonl-parser.js';
export type { EvalMetadata, GovernanceMetadata } from './metadata.js';
export type { EvalMetadata } from './metadata.js';

const ANSI_YELLOW = '\u001b[33m';
const ANSI_RED = '\u001b[31m';
Expand Down Expand Up @@ -362,7 +362,7 @@ async function loadTestsFromYaml(
const suiteWorkspace = await resolveWorkspaceConfig(suite.workspace, evalFileDir);

// Suite-level governance block (top-level `governance:` wins over `metadata.governance:`).
// Per-case `metadata.governance` merges with this — arrays concatenate, scalars override.
// Merged into each case's `metadata.governance` via mergeSuiteMetadataPayload.
const suiteGovernance = extractSuiteGovernance(suite);

// Resolve suite-level input (prepended to each test's input messages)
Expand Down Expand Up @@ -547,12 +547,14 @@ async function loadTestsFromYaml(
const caseWorkspace = await resolveWorkspaceConfig(testCaseConfig.workspace, evalFileDir);
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);

// Parse per-case metadata, then merge suite-level governance onto the case.
// Arrays concatenate (suite controls + case controls), scalars on the case win.
// Parse per-case metadata, then merge suite-level metadata payload.
// Arrays concatenate (suite-first, deduplicated), scalars on the case win.
const rawCaseMetadata = isJsonObject(testCaseConfig.metadata)
? (testCaseConfig.metadata as Record<string, unknown>)
: undefined;
const metadata = mergeCaseGovernance(rawCaseMetadata, suiteGovernance);
const suitePayload =
suiteGovernance !== undefined ? { governance: suiteGovernance } : undefined;
const metadata = mergeSuiteMetadataPayload(rawCaseMetadata, suitePayload);

// Extract per-test targets override (matrix evaluation)
const caseTargets = extractTargetsFromTestCase(testCaseConfig as JsonObject);
Expand Down Expand Up @@ -953,44 +955,40 @@ function extractSuiteGovernance(suite: RawTestSuite): Record<string, unknown> |
}

/**
* Merge suite-level governance into a case's metadata. Arrays concatenate (suite controls +
* case controls, deduplicated); scalar fields on the case override the suite. The case's
* other metadata keys are preserved verbatim. Returns undefined if neither side has anything.
* Merge a suite-level metadata payload into a case's metadata map. The same rules apply to
* every key in the payload: arrays concatenate suite-first and deduplicate; nested objects
* recurse; scalar fields on the case win; suite fills in keys the case omits.
*/
function mergeCaseGovernance(
function mergeSuiteMetadataPayload(
caseMetadata: Record<string, unknown> | undefined,
suiteGovernance: Record<string, unknown> | undefined,
suitePayload: Record<string, unknown> | undefined,
): Record<string, unknown> | undefined {
const caseGovernance = isJsonObject(caseMetadata?.governance)
? (caseMetadata?.governance as Record<string, unknown>)
: undefined;

if (!suiteGovernance && !caseGovernance) {
return caseMetadata;
}

const merged: Record<string, unknown> = { ...(suiteGovernance ?? {}) };
if (caseGovernance) {
for (const [key, caseValue] of Object.entries(caseGovernance)) {
const suiteValue = merged[key];
if (Array.isArray(suiteValue) && Array.isArray(caseValue)) {
const seen = new Set<string>();
const out: unknown[] = [];
for (const v of [...suiteValue, ...caseValue]) {
const key = typeof v === 'string' ? v : JSON.stringify(v);
if (!seen.has(key)) {
seen.add(key);
out.push(v);
}
if (!suitePayload) return caseMetadata;

const result: Record<string, unknown> = { ...(caseMetadata ?? {}) };
for (const [key, suiteVal] of Object.entries(suitePayload)) {
const caseVal = result[key];
if (Array.isArray(suiteVal) && Array.isArray(caseVal)) {
const seen = new Set<string>();
const out: unknown[] = [];
for (const v of [...suiteVal, ...caseVal]) {
const k = typeof v === 'string' ? v : JSON.stringify(v);
if (!seen.has(k)) {
seen.add(k);
out.push(v);
}
merged[key] = out;
} else {
merged[key] = caseValue;
}
result[key] = out;
} else if (isJsonObject(suiteVal) && isJsonObject(caseVal)) {
result[key] = mergeSuiteMetadataPayload(
caseVal as Record<string, unknown>,
suiteVal as Record<string, unknown>,
);
} else if (caseVal === undefined) {
result[key] = suiteVal;
}
}

return { ...(caseMetadata ?? {}), governance: merged };
return result;
}

function logWarning(message: string, details?: readonly string[]): void {
Expand Down
Loading
Loading