diff --git a/apps/cli/test/commands/eval/assert.test.ts b/apps/cli/test/commands/eval/assert.test.ts index ca2607f56..0a661d7f9 100644 --- a/apps/cli/test/commands/eval/assert.test.ts +++ b/apps/cli/test/commands/eval/assert.test.ts @@ -61,7 +61,7 @@ describe('agentv eval assert', () => { } finally { await rm(baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('exits 1 when grader returns score 0', async () => { const { baseDir } = await createGraderFixture(); @@ -87,7 +87,7 @@ describe('agentv eval assert', () => { } finally { await rm(baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('exits 0 when grader returns passing score', async () => { const { baseDir } = await createGraderFixture(); @@ -113,7 +113,7 @@ describe('agentv eval assert', () => { } finally { await rm(baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('errors when grader name not found', async () => { const { baseDir } = await createGraderFixture(); @@ -137,5 +137,5 @@ describe('agentv eval assert', () => { } finally { await rm(baseDir, { recursive: true, force: true }); } - }); + }, 30_000); }); diff --git a/apps/cli/test/commands/eval/pipeline/bench.test.ts b/apps/cli/test/commands/eval/pipeline/bench.test.ts index 770b04ae0..1fab88432 100644 --- a/apps/cli/test/commands/eval/pipeline/bench.test.ts +++ b/apps/cli/test/commands/eval/pipeline/bench.test.ts @@ -76,7 +76,7 @@ describe('pipeline bench', () => { expect(grading.summary.pass_rate).toBeGreaterThan(0); expect(grading.assertions.length).toBeGreaterThan(0); expect(grading.graders).toHaveLength(2); - }); + }, 30_000); it('writes index.jsonl with one entry per test', async () => { await writeFile( @@ -98,7 +98,7 @@ describe('pipeline bench', () => { expect(lines).toHaveLength(1); expect(lines[0].test_id).toBe('test-01'); expect(lines[0].score).toBeGreaterThan(0); - }); + }, 30_000); it('writes benchmark.json with run_summary', async () => { await writeFile( @@ -115,7 +115,7 @@ describe('pipeline bench', () => { const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8')); expect(benchmark.metadata.targets).toContain('test-target'); expect(benchmark.run_summary['test-target']).toBeDefined(); - }); + }, 30_000); it('propagates experiment from manifest to index.jsonl and benchmark.json', async () => { // Overwrite manifest with experiment field @@ -139,7 +139,7 @@ describe('pipeline bench', () => { const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8')); expect(benchmark.metadata.experiment).toBe('without_skills'); - }); + }, 30_000); it('omits experiment from output when manifest has no experiment', async () => { const { execa } = await import('execa'); @@ -151,5 +151,5 @@ describe('pipeline bench', () => { const benchmark = JSON.parse(await readFile(join(OUT_DIR, 'benchmark.json'), 'utf8')); expect(benchmark.metadata.experiment).toBeUndefined(); - }); + }, 30_000); }); diff --git a/apps/cli/test/commands/eval/pipeline/grade.test.ts b/apps/cli/test/commands/eval/pipeline/grade.test.ts index fda2fc27a..d2cdf802a 100644 --- a/apps/cli/test/commands/eval/pipeline/grade.test.ts +++ b/apps/cli/test/commands/eval/pipeline/grade.test.ts @@ -55,7 +55,7 @@ describe('pipeline grade', () => { ); expect(result.score).toBe(1); expect(result.name).toBe('always_pass'); - }); + }, 30_000); it('includes assertions from code grader output', async () => { const { execa } = await import('execa'); @@ -66,7 +66,7 @@ describe('pipeline grade', () => { ); expect(result.assertions).toHaveLength(1); expect(result.assertions[0].passed).toBe(true); - }); + }, 30_000); }); describe('pipeline grade — builtin assertions', () => { @@ -144,7 +144,7 @@ describe('pipeline grade — builtin assertions', () => { expect(result.score).toBe(1); expect(result.type).toBe('contains'); expect(result.assertions[0].passed).toBe(true); - }); + }, 30_000); it('evaluates regex assertion and writes result', async () => { const { execa } = await import('execa'); @@ -158,7 +158,7 @@ describe('pipeline grade — builtin assertions', () => { ); expect(result.score).toBe(1); expect(result.type).toBe('regex'); - }); + }, 30_000); it('scores 0 when contains assertion does not match', async () => { const { execa } = await import('execa'); @@ -172,7 +172,7 @@ describe('pipeline grade — builtin assertions', () => { ); expect(result.score).toBe(0); expect(result.assertions[0].passed).toBe(false); - }); + }, 30_000); it('applies negate to invert score', async () => { // Overwrite has_goodbye with negate: true — "not contains goodbye" should pass @@ -198,5 +198,5 @@ describe('pipeline grade — builtin assertions', () => { ); expect(result.score).toBe(1); expect(result.assertions[0].passed).toBe(true); - }); + }, 30_000); }); diff --git a/apps/cli/test/commands/trend/trend.test.ts b/apps/cli/test/commands/trend/trend.test.ts index b29887919..0b6c229d7 100644 --- a/apps/cli/test/commands/trend/trend.test.ts +++ b/apps/cli/test/commands/trend/trend.test.ts @@ -348,7 +348,7 @@ describe('trend command', () => { }); expect((parsed.summary as Record).direction).toBe('degrading'); expect((parsed.summary as Record).matched_test_count).toBe(2); - }); + }, 30_000); it('normalizes explicit run inputs to chronological order before analysis', async () => { const cwd = await createTempDir(); @@ -499,7 +499,7 @@ describe('trend command', () => { expect(result.exitCode).toBe(1); expect(result.stdout).toContain('Trend Analysis'); expect(result.stdout).toContain('degrading'); - }); + }, 30_000); it('errors when target filtering leaves a selected run empty in CLI mode', async () => { const cwd = await createTempDir(); @@ -532,5 +532,5 @@ describe('trend command', () => { expect(result.exitCode).toBe(1); expect(result.stderr).toContain('Run has no matching records'); - }); + }, 30_000); }); diff --git a/apps/cli/test/eval.integration.test.ts b/apps/cli/test/eval.integration.test.ts index f045fab58..60fc5317b 100644 --- a/apps/cli/test/eval.integration.test.ts +++ b/apps/cli/test/eval.integration.test.ts @@ -232,7 +232,7 @@ describe('agentv eval CLI', () => { } finally { await rm(fixture.baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('loads the nearest .env first and uses parent .env only for missing keys', async () => { const fixture = await createNestedEnvFixture(); @@ -249,7 +249,7 @@ describe('agentv eval CLI', () => { } finally { await rm(fixture.baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('supports repeatable --test-id flags with OR matching', async () => { const fixture = await createFixture(); @@ -269,7 +269,7 @@ describe('agentv eval CLI', () => { } finally { await rm(fixture.baseDir, { recursive: true, force: true }); } - }); + }, 30_000); it('passes run-level budget tracking through to the evaluator', async () => { const fixture = await createFixture(); @@ -285,5 +285,5 @@ describe('agentv eval CLI', () => { } finally { await rm(fixture.baseDir, { recursive: true, force: true }); } - }); + }, 30_000); }); diff --git a/packages/core/src/evaluation/loaders/config-loader.ts b/packages/core/src/evaluation/loaders/config-loader.ts index 939afbc8c..cf819f0df 100644 --- a/packages/core/src/evaluation/loaders/config-loader.ts +++ b/packages/core/src/evaluation/loaders/config-loader.ts @@ -2,7 +2,6 @@ import { readFile } from 'node:fs/promises'; import path from 'node:path'; import { interpolateEnv } from '../interpolation.js'; -import { parseYamlValue } from '../yaml-loader.js'; import type { EvalTargetRef, FailOnError, @@ -13,6 +12,7 @@ import type { WorkspaceHookConfig, } from '../types.js'; import { isJsonObject } from '../types.js'; +import { parseYamlValue } from '../yaml-loader.js'; import { buildDirectoryChain, fileExists } from './file-resolver.js'; const ANSI_YELLOW = '\u001b[33m'; diff --git a/packages/core/src/evaluation/loaders/grader-parser.ts b/packages/core/src/evaluation/loaders/grader-parser.ts index 165de6ff5..42db53e4c 100644 --- a/packages/core/src/evaluation/loaders/grader-parser.ts +++ b/packages/core/src/evaluation/loaders/grader-parser.ts @@ -3,7 +3,6 @@ import path from 'node:path'; import { normalizePreprocessorType } from '../content-preprocessor.js'; import { interpolateEnv } from '../interpolation.js'; -import { parseYamlValue } from '../yaml-loader.js'; import type { ToolTrajectoryExpectedItem, ToolTrajectoryGraderConfig } from '../trace.js'; import type { ContentPreprocessorConfig, @@ -14,6 +13,7 @@ import type { } from '../types.js'; import { isGraderKind } from '../types.js'; import { validateCustomPromptContent } from '../validation/prompt-validator.js'; +import { parseYamlValue } from '../yaml-loader.js'; import { resolveFileReference } from './file-resolver.js'; const ANSI_YELLOW = '\u001b[33m'; diff --git a/packages/core/test/evaluation/workspace/pool-manager.test.ts b/packages/core/test/evaluation/workspace/pool-manager.test.ts index b7b3da0f5..08f6bab70 100644 --- a/packages/core/test/evaluation/workspace/pool-manager.test.ts +++ b/packages/core/test/evaluation/workspace/pool-manager.test.ts @@ -197,7 +197,7 @@ describe('WorkspacePoolManager', () => { expect(existsSync(path.join(slot.path, 'my-repo', 'hello.txt'))).toBe(true); await manager.releaseSlot(slot); - }); + }, 30_000); it('reuses existing slot when available (after release)', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -226,7 +226,7 @@ describe('WorkspacePoolManager', () => { expect(slot2.path).toBe(slot1.path); await manager.releaseSlot(slot2); - }); + }, 30_000); it('creates slot-1 when slot-0 is locked (concurrent access)', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -254,7 +254,7 @@ describe('WorkspacePoolManager', () => { await manager.releaseSlot(slot0); await manager.releaseSlot(slot1); - }); + }, 30_000); it('PID-based stale lock detection works', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -286,7 +286,7 @@ describe('WorkspacePoolManager', () => { expect(slot2.isExisting).toBe(true); await manager.releaseSlot(slot2); - }); + }, 30_000); it('throws when all slots are locked', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -314,7 +314,7 @@ describe('WorkspacePoolManager', () => { await manager.releaseSlot(slot0); await manager.releaseSlot(slot1); - }); + }, 30_000); }); describe('drift detection', () => { @@ -336,7 +336,7 @@ describe('WorkspacePoolManager', () => { expect(slot.index).toBe(0); await manager.releaseSlot(slot); - }); + }, 30_000); it('no drift when fingerprint matches', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -363,7 +363,7 @@ describe('WorkspacePoolManager', () => { expect(slot2.index).toBe(0); await manager.releaseSlot(slot2); - }); + }, 30_000); it('detects drift when fingerprint changes', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -397,7 +397,7 @@ describe('WorkspacePoolManager', () => { expect(slot2.fingerprint).toBe(fp1); await manager.releaseSlot(slot2); - }); + }, 30_000); it('removes stale slots on drift', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -432,7 +432,7 @@ describe('WorkspacePoolManager', () => { expect(slot2.isExisting).toBe(false); await manager.releaseSlot(slot2); - }); + }, 30_000); }); describe('full acquireWorkspace flow', () => { @@ -470,7 +470,7 @@ describe('WorkspacePoolManager', () => { expect(metadata.templatePath).toBe(templateDir); await manager.releaseSlot(slot); - }); + }, 30_000); it('reuses workspace on second run (resets repos, re-copies template)', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -518,7 +518,7 @@ describe('WorkspacePoolManager', () => { ); await manager.releaseSlot(slot2); - }); + }, 30_000); it('agent-created files are cleaned by git clean -fd', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -551,7 +551,7 @@ describe('WorkspacePoolManager', () => { expect(existsSync(path.join(slot2.path, 'my-repo', 'agent-output.txt'))).toBe(false); await manager.releaseSlot(slot2); - }); + }, 30_000); it('original repo files restored after reset', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -585,7 +585,7 @@ describe('WorkspacePoolManager', () => { ); await manager.releaseSlot(slot2); - }); + }, 30_000); it('template files overwritten on reuse', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -623,7 +623,7 @@ describe('WorkspacePoolManager', () => { expect(readFileSync(path.join(slot2.path, 'config.yaml'), 'utf-8')).toBe('version: 2'); await manager.releaseSlot(slot2); - }); + }, 30_000); it('works with template only (no repos)', async () => { const templateDir = path.join(tmpDir, 'template'); @@ -642,7 +642,7 @@ describe('WorkspacePoolManager', () => { expect(slot.isExisting).toBe(false); await manager.releaseSlot(slot); - }); + }, 30_000); it('works with repos only (no template)', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -658,7 +658,7 @@ describe('WorkspacePoolManager', () => { expect(existsSync(path.join(slot.path, 'my-repo', 'hello.txt'))).toBe(true); await manager.releaseSlot(slot); - }); + }, 30_000); it('handles multiple repos in a workspace', async () => { const repoA = path.join(tmpDir, 'repo-a'); @@ -680,7 +680,7 @@ describe('WorkspacePoolManager', () => { expect(readFileSync(path.join(slot.path, 'repo-b', 'b.txt'), 'utf-8')).toBe('repo-b'); await manager.releaseSlot(slot); - }); + }, 30_000); }); describe('pool reset policy', () => { @@ -720,7 +720,7 @@ describe('WorkspacePoolManager', () => { expect(existsSync(path.join(slot2.path, 'my-repo', 'build', 'output.js'))).toBe(false); await manager.releaseSlot(slot2); - }); + }, 30_000); it('default fast reset preserves gitignored files on reuse', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -756,7 +756,7 @@ describe('WorkspacePoolManager', () => { expect(existsSync(path.join(slot2.path, 'my-repo', 'build', 'output.js'))).toBe(true); await manager.releaseSlot(slot2); - }); + }, 30_000); }); describe('resolve: remote pool reuse', () => { @@ -806,7 +806,7 @@ describe('WorkspacePoolManager', () => { expect(slot2Head).toBe(newSha); await manager.releaseSlot(slot2); - }); + }, 30_000); it('does not fetch from remote when resolve is local', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -847,7 +847,7 @@ describe('WorkspacePoolManager', () => { expect(readFileSync(path.join(slot2.path, 'my-repo', 'hello.txt'), 'utf-8')).toBe('v1'); await manager.releaseSlot(slot2); - }); + }, 30_000); }); describe('releaseSlot', () => { @@ -867,7 +867,7 @@ describe('WorkspacePoolManager', () => { await manager.releaseSlot(slot); expect(existsSync(slot.lockPath)).toBe(false); - }); + }, 30_000); it('does not throw if lock file already removed', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -883,6 +883,6 @@ describe('WorkspacePoolManager', () => { await manager.releaseSlot(slot); // Second release should not throw await manager.releaseSlot(slot); - }); + }, 30_000); }); }); diff --git a/packages/core/test/evaluation/workspace/repo-manager.test.ts b/packages/core/test/evaluation/workspace/repo-manager.test.ts index 2171d0b6e..6d5744a47 100644 --- a/packages/core/test/evaluation/workspace/repo-manager.test.ts +++ b/packages/core/test/evaluation/workspace/repo-manager.test.ts @@ -70,7 +70,7 @@ describe('RepoManager', () => { const targetDir = path.join(workspaceDir, 'my-repo'); expect(existsSync(path.join(targetDir, 'hello.txt'))).toBe(true); - }); + }, 30_000); it('checks out specified ref', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -97,7 +97,7 @@ describe('RepoManager', () => { expect(headSha).toBe(secondSha); expect(existsSync(path.join(targetDir, 'second.txt'))).toBe(true); expect(existsSync(path.join(targetDir, 'third.txt'))).toBe(false); - }); + }, 30_000); it('walks ancestor commits', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -117,7 +117,7 @@ describe('RepoManager', () => { const targetDir = path.join(workspaceDir, 'my-repo'); const headSha = gitExec('git rev-parse HEAD', targetDir); expect(headSha).toBe(firstSha); - }); + }, 30_000); it('supports shallow clone with depth', async () => { const repoDir = path.join(tmpDir, 'source-repo'); @@ -139,7 +139,7 @@ describe('RepoManager', () => { const targetDir = path.join(workspaceDir, 'my-repo'); const logCount = gitExec('git rev-list --count HEAD', targetDir); expect(Number(logCount)).toBe(2); - }); + }, 30_000); }); describe('materializeAll', () => { @@ -159,7 +159,7 @@ describe('RepoManager', () => { expect(existsSync(path.join(workspaceDir, 'repo-a', 'a.txt'))).toBe(true); expect(existsSync(path.join(workspaceDir, 'repo-b', 'b.txt'))).toBe(true); - }); + }, 30_000); }); describe('validateLocalPaths', () => { @@ -284,6 +284,6 @@ describe('RepoManager', () => { expect(existsSync(path.join(targetDir, 'agent-created.txt'))).toBe(false); const content = readFileSync(path.join(targetDir, 'original.txt'), 'utf-8'); expect(content).toBe('original'); - }); + }, 30_000); }); });