EntityProcess · christso · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/apps/cli/test/commands/eval/assert.test.ts b/apps/cli/test/commands/eval/assert.test.ts
@@ -61,7 +61,7 @@ describe('agentv eval assert', () => {
     } finally {
       await rm(baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('exits 1 when grader returns score 0', async () => {
     const { baseDir } = await createGraderFixture();
@@ -87,7 +87,7 @@ describe('agentv eval assert', () => {
     } finally {
       await rm(baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('exits 0 when grader returns passing score', async () => {
     const { baseDir } = await createGraderFixture();
@@ -113,7 +113,7 @@ describe('agentv eval assert', () => {
     } finally {
       await rm(baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('errors when grader name not found', async () => {
     const { baseDir } = await createGraderFixture();
@@ -137,5 +137,5 @@ describe('agentv eval assert', () => {
     } finally {
       await rm(baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 });
diff --git a/apps/cli/test/commands/eval/pipeline/bench.test.ts b/apps/cli/test/commands/eval/pipeline/bench.test.ts
@@ -76,7 +76,7 @@ describe('pipeline bench', () => {
     expect(grading.summary.pass_rate).toBeGreaterThan(0);
     expect(grading.assertions.length).toBeGreaterThan(0);
     expect(grading.graders).toHaveLength(2);
-  });
+  }, 30_000);
 
   it('writes index.jsonl with one entry per test', async () => {
     await writeFile(
@@ -98,7 +98,7 @@ describe('pipeline bench', () => {
     expect(lines).toHaveLength(1);
     expect(lines[0].test_id).toBe('test-01');
     expect(lines[0].score).toBeGreaterThan(0);
-  });
+  }, 30_000);
 
   it('writes benchmark.json with run_summary', async () => {
     await writeFile(
@@ -115,7 +115,7 @@ describe('pipeline bench', () => {
     const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8'));
     expect(benchmark.metadata.targets).toContain('test-target');
     expect(benchmark.run_summary['test-target']).toBeDefined();
-  });
+  }, 30_000);
 
   it('propagates experiment from manifest to index.jsonl and benchmark.json', async () => {
     // Overwrite manifest with experiment field
@@ -139,7 +139,7 @@ describe('pipeline bench', () => {
 
     const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8'));
     expect(benchmark.metadata.experiment).toBe('without_skills');
-  });
+  }, 30_000);
 
   it('omits experiment from output when manifest has no experiment', async () => {
     const { execa } = await import('execa');
@@ -151,5 +151,5 @@ describe('pipeline bench', () => {
 
     const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8'));
     expect(benchmark.metadata.experiment).toBeUndefined();
-  });
+  }, 30_000);
 });
diff --git a/apps/cli/test/commands/eval/pipeline/grade.test.ts b/apps/cli/test/commands/eval/pipeline/grade.test.ts
@@ -55,7 +55,7 @@ describe('pipeline grade', () => {
     );
     expect(result.score).toBe(1);
     expect(result.name).toBe('always_pass');
-  });
+  }, 30_000);
 
   it('includes assertions from code grader output', async () => {
     const { execa } = await import('execa');
@@ -66,7 +66,7 @@ describe('pipeline grade', () => {
     );
     expect(result.assertions).toHaveLength(1);
     expect(result.assertions[0].passed).toBe(true);
-  });
+  }, 30_000);
 });
 
 describe('pipeline grade — builtin assertions', () => {
@@ -144,7 +144,7 @@ describe('pipeline grade — builtin assertions', () => {
     expect(result.score).toBe(1);
     expect(result.type).toBe('contains');
     expect(result.assertions[0].passed).toBe(true);
-  });
+  }, 30_000);
 
   it('evaluates regex assertion and writes result', async () => {
     const { execa } = await import('execa');
@@ -158,7 +158,7 @@ describe('pipeline grade — builtin assertions', () => {
     );
     expect(result.score).toBe(1);
     expect(result.type).toBe('regex');
-  });
+  }, 30_000);
 
   it('scores 0 when contains assertion does not match', async () => {
     const { execa } = await import('execa');
@@ -172,7 +172,7 @@ describe('pipeline grade — builtin assertions', () => {
     );
     expect(result.score).toBe(0);
     expect(result.assertions[0].passed).toBe(false);
-  });
+  }, 30_000);
 
   it('applies negate to invert score', async () => {
     // Overwrite has_goodbye with negate: true — "not contains goodbye" should pass
@@ -198,5 +198,5 @@ describe('pipeline grade — builtin assertions', () => {
     );
     expect(result.score).toBe(1);
     expect(result.assertions[0].passed).toBe(true);
-  });
+  }, 30_000);
 });
diff --git a/apps/cli/test/commands/trend/trend.test.ts b/apps/cli/test/commands/trend/trend.test.ts
@@ -348,7 +348,7 @@ describe('trend command', () => {
     });
     expect((parsed.summary as Record<string, unknown>).direction).toBe('degrading');
     expect((parsed.summary as Record<string, unknown>).matched_test_count).toBe(2);
-  });
+  }, 30_000);
 
   it('normalizes explicit run inputs to chronological order before analysis', async () => {
     const cwd = await createTempDir();
@@ -499,7 +499,7 @@ describe('trend command', () => {
     expect(result.exitCode).toBe(1);
     expect(result.stdout).toContain('Trend Analysis');
     expect(result.stdout).toContain('degrading');
-  });
+  }, 30_000);
 
   it('errors when target filtering leaves a selected run empty in CLI mode', async () => {
     const cwd = await createTempDir();
@@ -532,5 +532,5 @@ describe('trend command', () => {
 
     expect(result.exitCode).toBe(1);
     expect(result.stderr).toContain('Run has no matching records');
-  });
+  }, 30_000);
 });
diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts
@@ -232,7 +232,7 @@ describe('agentv eval CLI', () => {
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('loads the nearest .env first and uses parent .env only for missing keys', async () => {
     const fixture = await createNestedEnvFixture();
@@ -249,7 +249,7 @@ describe('agentv eval CLI', () => {
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('supports repeatable --test-id flags with OR matching', async () => {
     const fixture = await createFixture();
@@ -269,7 +269,7 @@ describe('agentv eval CLI', () => {
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 
   it('passes run-level budget tracking through to the evaluator', async () => {
     const fixture = await createFixture();
@@ -285,5 +285,5 @@ describe('agentv eval CLI', () => {
     } finally {
       await rm(fixture.baseDir, { recursive: true, force: true });
     }
-  });
+  }, 30_000);
 });
diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts
@@ -2,7 +2,6 @@ import { readFile } from 'node:fs/promises';
 import path from 'node:path';
 
 import { interpolateEnv } from '../interpolation.js';
-import { parseYamlValue } from '../yaml-loader.js';
 import type {
   EvalTargetRef,
   FailOnError,
@@ -13,6 +12,7 @@ import type {
   WorkspaceHookConfig,
 } from '../types.js';
 import { isJsonObject } from '../types.js';
+import { parseYamlValue } from '../yaml-loader.js';
 import { buildDirectoryChain, fileExists } from './file-resolver.js';
 
 const ANSI_YELLOW = '\u001b[33m';

diff --git a/packages/core/src/evaluation/loaders/grader-parser.ts b/packages/core/src/evaluation/loaders/grader-parser.ts
@@ -3,7 +3,6 @@ import path from 'node:path';
 
 import { normalizePreprocessorType } from '../content-preprocessor.js';
 import { interpolateEnv } from '../interpolation.js';
-import { parseYamlValue } from '../yaml-loader.js';
 import type { ToolTrajectoryExpectedItem, ToolTrajectoryGraderConfig } from '../trace.js';
 import type {
   ContentPreprocessorConfig,
@@ -14,6 +13,7 @@ import type {
 } from '../types.js';
 import { isGraderKind } from '../types.js';
 import { validateCustomPromptContent } from '../validation/prompt-validator.js';
+import { parseYamlValue } from '../yaml-loader.js';
 import { resolveFileReference } from './file-resolver.js';
 
 const ANSI_YELLOW = '\u001b[33m';