From 577127f94e7c6a9155b1df3107b9ba9dcf152e74 Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sat, 2 May 2026 22:14:11 -0700 Subject: [PATCH] feat: extensible profiles, data-driven pruning, and task learning (v1.4.0) Phase 0-2 of extensible categories and task learning plan: - Profile types: BrowserAgentProfile, DataDrivenPruningPolicy, ResolvedAgentProfile - ProfileRegistry with keyword/domain matching and priority resolution - Data-driven pruning adapter alongside existing category-based pruning - Fingerprint normalizer for learned target identification - Learning extractor with privacy safeguards and stale fingerprint decay - LearningStore interface + InMemoryLearningStore implementation - Zod schemas for profile JSON validation - PlannerExecutorAgent accepts resolvedProfile + onStepOutcome callback - All 1027 tests passing (87 suites) --- package.json | 2 +- src/agents/browser-agent.ts | 42 +++ .../planner-executor/category-pruner.ts | 34 ++ src/agents/planner-executor/common-hints.ts | 32 +- .../planner-executor/composable-heuristics.ts | 120 +++++++ .../planner-executor/data-driven-pruner.ts | 116 +++++++ .../fingerprint-normalizer.ts | 201 +++++++++++ src/agents/planner-executor/index.ts | 39 +++ .../planner-executor/learning-extractor.ts | 315 ++++++++++++++++++ src/agents/planner-executor/learning-store.ts | 96 ++++++ .../planner-executor-agent.ts | 33 +- .../planner-executor/profile-registry.ts | 160 +++++++++ src/agents/planner-executor/profile-schema.ts | 75 +++++ src/agents/planner-executor/profile-types.ts | 176 ++++++++++ src/agents/planner-executor/pruning-types.ts | 5 + src/browser-agent.ts | 46 +++ tests/data-driven-pruner.test.ts | 136 ++++++++ tests/fingerprint-normalizer.test.ts | 150 +++++++++ tests/learning-extractor.test.ts | 296 ++++++++++++++++ tests/profile-registry.test.ts | 170 ++++++++++ 20 files changed, 2240 insertions(+), 4 deletions(-) create mode 100644 src/agents/planner-executor/data-driven-pruner.ts create mode 100644 src/agents/planner-executor/fingerprint-normalizer.ts create mode 100644 src/agents/planner-executor/learning-extractor.ts create mode 100644 src/agents/planner-executor/learning-store.ts create mode 100644 src/agents/planner-executor/profile-registry.ts create mode 100644 src/agents/planner-executor/profile-schema.ts create mode 100644 src/agents/planner-executor/profile-types.ts create mode 100644 tests/data-driven-pruner.test.ts create mode 100644 tests/fingerprint-normalizer.test.ts create mode 100644 tests/learning-extractor.test.ts create mode 100644 tests/profile-registry.test.ts diff --git a/package.json b/package.json index cb14e56..b3bc486 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@predicatesystems/runtime", - "version": "1.3.4", + "version": "1.4.0", "description": "TypeScript SDK for Sentience AI Agent Browser Automation", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/agents/browser-agent.ts b/src/agents/browser-agent.ts index 7245af6..6ab523b 100644 --- a/src/agents/browser-agent.ts +++ b/src/agents/browser-agent.ts @@ -207,6 +207,48 @@ class TokenAccountingProvider extends LLMProvider { } } +// Re-export planner-executor profile/learning types for extension consumption +export type { + DataDrivenPruningPolicy, + BrowserAgentProfile, + ResolvedAgentProfile, + LearnedTargetFingerprint, + DomainProfile, +} from './planner-executor/profile-types'; +export { EMPTY_RESOLVED_PROFILE } from './planner-executor/profile-types'; +export { + DataDrivenPruningPolicySchema, + BrowserAgentProfileSchema, + BrowserAgentProfileArraySchema, + LearnedTargetFingerprintSchema, + DomainProfileSchema, +} from './planner-executor/profile-schema'; +export { ProfileRegistry } from './planner-executor/profile-registry'; +export { pruneWithPolicy } from './planner-executor/data-driven-pruner'; +export { + computeTaskHash, + extractDomain, + createFingerprint, + mergeFingerprint, + recordFingerprintFailure, +} from './planner-executor/fingerprint-normalizer'; +export type { LearningStore } from './planner-executor/learning-store'; +export { InMemoryLearningStore } from './planner-executor/learning-store'; +export { + extractFingerprintFromOutcome, + applyFingerprintFailure, + applyFingerprintSuccess, + isFingerprintStale, + isFingerprintExpired, + fingerprintToHint, + computeTaskHash as computeAsyncTaskHash, + isSensitiveUrl as isLearningSensitiveUrl, +} from './planner-executor/learning-extractor'; +export type { + LearningExtractionOptions, + LearningExtractionResult, +} from './planner-executor/learning-extractor'; + export type StepOutcome = { stepGoal: string; ok: boolean }; export class PredicateBrowserAgent { diff --git a/src/agents/planner-executor/category-pruner.ts b/src/agents/planner-executor/category-pruner.ts index bfcb547..0a30494 100644 --- a/src/agents/planner-executor/category-pruner.ts +++ b/src/agents/planner-executor/category-pruner.ts @@ -6,6 +6,7 @@ import { type PrunedSnapshotContext, } from './pruning-types'; import { TaskCategory } from './task-category'; +import { pruneWithPolicy } from './data-driven-pruner'; function textOf(element: SnapshotElement): string { return String(element.text || element.name || '').toLowerCase(); @@ -270,6 +271,39 @@ export function pruneSnapshotForTask( options: PruneSnapshotOptions ): PrunedSnapshotContext { const relaxationLevel = Math.max(0, options.relaxationLevel || 0); + + // Data-driven path: use profile policy if provided + if (options.profilePolicy) { + const { elements, maxNodes } = pruneWithPolicy( + snapshot, + options.profilePolicy, + options.goal, + relaxationLevel, + options.category, + options.learnedFingerprints + ); + const actionableElementCount = selectContextElements(elements, elements.length || 1).length; + + return { + category: options.category, + snapshot, + elements, + promptBlock: formatPrunedContext({ + category: options.category, + elements, + relaxationLevel, + rawElementCount: snapshot.elements.length, + prunedElementCount: elements.length, + actionableElementCount, + }), + relaxationLevel, + rawElementCount: snapshot.elements.length, + prunedElementCount: elements.length, + actionableElementCount, + }; + } + + // Built-in category path const policy = getPolicy(options.category, relaxationLevel); const filtered = (snapshot.elements || []).filter(element => { if (policy.block(element)) { diff --git a/src/agents/planner-executor/common-hints.ts b/src/agents/planner-executor/common-hints.ts index 0ecb51f..22c354b 100644 --- a/src/agents/planner-executor/common-hints.ts +++ b/src/agents/planner-executor/common-hints.ts @@ -1,4 +1,5 @@ import { HeuristicHint } from './heuristic-hint'; +import type { HeuristicHintInput } from './heuristic-hint'; export const COMMON_HINTS = { add_to_cart: new HeuristicHint({ @@ -56,8 +57,20 @@ export const COMMON_HINTS = { }), } as const; -export function getCommonHint(intent: string): HeuristicHint | null { +/** + * Look up a heuristic hint by intent string. + * + * @param intent - The intent to look up (e.g., "add_to_cart", "book_flight") + * @param profileHints - Optional profile-provided hints to check after built-in hints + * @returns Matching HeuristicHint or null + */ +export function getCommonHint( + intent: string, + profileHints?: HeuristicHintInput[] +): HeuristicHint | null { const normalized = intent.toLowerCase().replace(/[\s-]+/g, '_'); + + // Check built-in hints first const exactMatch = COMMON_HINTS[normalized as keyof typeof COMMON_HINTS]; if (exactMatch) { return exactMatch; @@ -69,5 +82,22 @@ export function getCommonHint(intent: string): HeuristicHint | null { } } + // Check profile-provided hints + if (profileHints && profileHints.length > 0) { + for (const ph of profileHints) { + const pattern = ph.intentPattern ?? ph.intent_pattern ?? ''; + const phNormalized = pattern.toLowerCase().replace(/[\s-]+/g, '_'); + if (normalized.includes(phNormalized) || phNormalized.includes(normalized)) { + return new HeuristicHint({ + intentPattern: pattern, + textPatterns: ph.textPatterns ?? ph.text_patterns, + roleFilter: ph.roleFilter ?? ph.role_filter, + attributePatterns: ph.attributePatterns ?? ph.attribute_patterns, + priority: ph.priority, + }); + } + } + } + return null; } diff --git a/src/agents/planner-executor/composable-heuristics.ts b/src/agents/planner-executor/composable-heuristics.ts index 71e89a2..3d3e40f 100644 --- a/src/agents/planner-executor/composable-heuristics.ts +++ b/src/agents/planner-executor/composable-heuristics.ts @@ -3,23 +3,33 @@ import type { IntentHeuristics } from './planner-executor-agent'; import { COMMON_HINTS } from './common-hints'; import { HeuristicHint, type HeuristicHintInput } from './heuristic-hint'; import { TaskCategory } from './task-category'; +import type { LearnedTargetFingerprint } from './profile-types'; export interface ComposableHeuristicsOptions { staticHeuristics?: IntentHeuristics; taskCategory?: TaskCategory | null; useCommonHints?: boolean; + /** Learned fingerprints from previous successful runs */ + learnedFingerprints?: LearnedTargetFingerprint[]; } +/** Minimum confidence for a learned fingerprint to be used as a hint */ +const MIN_FINGERPRINT_CONFIDENCE = 0.3; + export class ComposableHeuristics implements IntentHeuristics { private readonly staticHeuristics: IntentHeuristics | null; private readonly taskCategory: TaskCategory | null; private readonly useCommonHints: boolean; private currentHints: HeuristicHint[] = []; + private readonly learnedFingerprints: LearnedTargetFingerprint[]; constructor(options: ComposableHeuristicsOptions = {}) { this.staticHeuristics = options.staticHeuristics ?? null; this.taskCategory = options.taskCategory ?? null; this.useCommonHints = options.useCommonHints ?? true; + this.learnedFingerprints = (options.learnedFingerprints ?? []).filter( + fp => fp.confidence >= MIN_FINGERPRINT_CONFIDENCE + ); } setStepHints(hints?: Array | null): void { @@ -48,6 +58,7 @@ export class ComposableHeuristics implements IntentHeuristics { return null; } + // 1. Check current step hints (highest priority) for (const hint of this.currentHints) { if (hint.matchesIntent(intent)) { const elementId = this.matchHint(hint, elements); @@ -57,6 +68,13 @@ export class ComposableHeuristics implements IntentHeuristics { } } + // 2. Check learned fingerprints (dynamic hints from successful past runs) + const learnedMatch = this.matchLearnedFingerprint(intent, elements); + if (learnedMatch !== null) { + return learnedMatch; + } + + // 3. Check common hints (built-in) if (this.useCommonHints) { const commonHint = this.getCommonHintForIntent(intent); if (commonHint) { @@ -67,6 +85,7 @@ export class ComposableHeuristics implements IntentHeuristics { } } + // 4. Check static heuristics if (this.staticHeuristics) { try { const elementId = this.staticHeuristics.findElementForIntent(intent, elements, url, goal); @@ -78,12 +97,14 @@ export class ComposableHeuristics implements IntentHeuristics { } } + // 5. Fall back to task category defaults return this.matchTaskCategoryDefaults(elements); } priorityOrder(): string[] { const patterns = [ ...this.currentHints.map(hint => hint.intentPattern), + ...this.learnedFingerprints.map(fp => fp.intent), ...(this.useCommonHints ? Object.keys(COMMON_HINTS) : []), ]; @@ -108,6 +129,105 @@ export class ComposableHeuristics implements IntentHeuristics { return null; } + /** + * Match learned fingerprints against current snapshot elements. + * Fingerprints are sorted by confidence (descending) so the most + * reliable past success is tried first. + */ + private matchLearnedFingerprint(intent: string, elements: SnapshotElement[]): number | null { + if (this.learnedFingerprints.length === 0) { + return null; + } + + const normalizedIntent = intent.toLowerCase().replace(/[\s-]+/g, '_'); + const sorted = [...this.learnedFingerprints].sort((a, b) => b.confidence - a.confidence); + + for (const fp of sorted) { + // Match intent: exact or substring match + const fpIntent = fp.intent.toLowerCase().replace(/[\s-]+/g, '_'); + if ( + fpIntent !== normalizedIntent && + !normalizedIntent.includes(fpIntent) && + !fpIntent.includes(normalizedIntent) + ) { + continue; + } + + for (const element of elements) { + if (this.fingerprintMatchesElement(fp, element)) { + return element.id; + } + } + } + + return null; + } + + /** + * Check if a learned fingerprint matches a snapshot element. + * Uses token overlap scoring with a minimum threshold. + */ + private fingerprintMatchesElement( + fp: LearnedTargetFingerprint, + element: SnapshotElement + ): boolean { + let score = 0; + let maxScore = 0; + + // Role match (weight: 2) + if (fp.role) { + maxScore += 2; + if ((element.role ?? '').toLowerCase() === fp.role) { + score += 2; + } + } + + // Text token overlap (weight: up to 3) + if (fp.textTokens && fp.textTokens.length > 0) { + maxScore += 3; + const elementText = [element.text, element.ariaLabel, element.name] + .filter((v): v is string => typeof v === 'string') + .join(' ') + .toLowerCase(); + const elementTokens = elementText.split(/\s+/).filter(t => t.length > 0); + const matchingTokens = fp.textTokens.filter(ft => + elementTokens.some(et => et === ft || et.includes(ft)) + ); + if (matchingTokens.length > 0) { + score += Math.min(3, Math.ceil((matchingTokens.length / fp.textTokens.length) * 3)); + } + } + + // ARIA token overlap (weight: up to 2) + if (fp.ariaTokens && fp.ariaTokens.length > 0) { + maxScore += 2; + const ariaText = [element.ariaLabel, element.name] + .filter((v): v is string => typeof v === 'string') + .join(' ') + .toLowerCase(); + const ariaTokens = ariaText.split(/\s+/).filter(t => t.length > 0); + const matchingTokens = fp.ariaTokens.filter(at => + ariaTokens.some(et => et === at || et.includes(at)) + ); + if (matchingTokens.length > 0) { + score += Math.min(2, Math.ceil((matchingTokens.length / fp.ariaTokens.length) * 2)); + } + } + + // href path pattern match (weight: 2) + if (fp.hrefPathPattern) { + maxScore += 2; + const href = element.href || ''; + if (href.toLowerCase().includes(fp.hrefPathPattern)) { + score += 2; + } + } + + // Require at least 50% of available score to consider it a match + if (maxScore === 0) return false; + return score / maxScore >= 0.5; + } + private getCommonHintForIntent(intent: string): HeuristicHint | null { const normalized = intent.toLowerCase().replace(/[\s-]+/g, '_'); if (normalized in COMMON_HINTS) { diff --git a/src/agents/planner-executor/data-driven-pruner.ts b/src/agents/planner-executor/data-driven-pruner.ts new file mode 100644 index 0000000..2f3e341 --- /dev/null +++ b/src/agents/planner-executor/data-driven-pruner.ts @@ -0,0 +1,116 @@ +/** + * Data-driven pruning using BrowserAgentProfile pruning policies. + * + * When a ResolvedAgentProfile contains a pruningPolicy, this module + * applies it instead of the built-in category-based pruning. + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { Snapshot, SnapshotElement } from './plan-models'; +import type { PruningTaskCategory } from './pruning-types'; +import type { DataDrivenPruningPolicy, LearnedTargetFingerprint } from './profile-types'; + +function roleOf(element: SnapshotElement): string { + return String(element.role || '').toLowerCase(); +} + +function textOf(element: SnapshotElement): string { + return [element.text, element.name, element.ariaLabel, element.href] + .filter((v): v is string => Boolean(v)) + .join(' ') + .toLowerCase(); +} + +function scoreElement( + element: SnapshotElement, + goal: string, + fingerprints: LearnedTargetFingerprint[] +): number { + let score = Number(element.importance || 0); + if (element.clickable) score += 15; + if (element.inDominantGroup) score += 20; + + // Boost elements matching learned fingerprints + for (const fp of fingerprints) { + if (fp.confidence > 0.5 && matchesFingerprint(element, fp)) { + score += Math.round(fp.confidence * 30); + } + } + + // Goal-term boost + const goalTerms = goal + .toLowerCase() + .split(/\s+/) + .filter(t => t.length > 2); + const text = textOf(element); + if (goalTerms.some(term => text.includes(term))) { + score += 10; + } + + return score; +} + +function matchesFingerprint(element: SnapshotElement, fp: LearnedTargetFingerprint): boolean { + if (fp.role && roleOf(element) !== fp.role.toLowerCase()) return false; + if (fp.textTokens && fp.textTokens.length > 0) { + const text = textOf(element); + if (!fp.textTokens.some(token => text.includes(token.toLowerCase()))) return false; + } + if (fp.hrefPathPattern && element.href) { + if (!String(element.href).includes(fp.hrefPathPattern)) return false; + } + return true; +} + +export function pruneWithPolicy( + snapshot: Snapshot, + policy: DataDrivenPruningPolicy, + goal: string, + relaxationLevel: number, + category: PruningTaskCategory, + fingerprints: LearnedTargetFingerprint[] = [] +): { elements: SnapshotElement[]; maxNodes: number } { + // Determine max nodes based on relaxation level + let maxNodes: number; + if (relaxationLevel === 0) { + maxNodes = policy.maxElements; + } else if (relaxationLevel === 1) { + maxNodes = policy.maxElementsRelaxed; + } else { + maxNodes = policy.maxElementsLoose ?? Math.min(policy.maxElementsRelaxed * 2, 100); + } + + const allowedRoles = new Set(policy.allowedRoles.map(r => r.toLowerCase())); + const excludePatterns = (policy.excludeTextPatterns ?? []).map(p => p.toLowerCase()); + const includePatterns = policy.includeTextPatterns ?? []; + + const filtered = (snapshot.elements || []).filter(element => { + // Role filter + if (allowedRoles.size > 0 && !allowedRoles.has(roleOf(element))) { + return false; + } + + const text = textOf(element); + + // Exclude patterns + if (excludePatterns.some(p => text.includes(p))) { + return false; + } + + // Include patterns (if specified, at least one must match) + if (includePatterns.length > 0) { + if (!includePatterns.some(p => text.includes(p.toLowerCase()))) { + return false; + } + } + + return true; + }); + + const elements = filtered + .sort((a, b) => scoreElement(b, goal, fingerprints) - scoreElement(a, goal, fingerprints)) + .slice(0, maxNodes); + + return { elements, maxNodes }; +} diff --git a/src/agents/planner-executor/fingerprint-normalizer.ts b/src/agents/planner-executor/fingerprint-normalizer.ts new file mode 100644 index 0000000..6e58446 --- /dev/null +++ b/src/agents/planner-executor/fingerprint-normalizer.ts @@ -0,0 +1,201 @@ +/** + * Fingerprint normalizer — creates privacy-safe, stable fingerprints from + * successful element interactions for L1 learning. + * + * Privacy gates: + * - Text tokens are capped at 4 tokens, each ≤ 20 chars + * - Sensitive tokens (email, phone, credit card patterns) are redacted + * - href values are reduced to path patterns only (no query params with PII) + * - Attribute values are capped at 30 chars + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { SnapshotElement } from './plan-models'; +import type { LearnedTargetFingerprint } from './profile-types'; + +const MAX_TEXT_TOKENS = 4; +const MAX_TOKEN_LENGTH = 20; +const MAX_ATTR_VALUE_LENGTH = 30; + +const SENSITIVE_PATTERNS = [ + /@\S+\.\S+/, // email + /\d{3}[-.\s]?\d{3}[-.\s]?\d{4}/, // phone + /\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}/, // credit card + /password/i, + /ssn/i, + /social.security/i, + /date.of.birth/i, + /dob/i, + /account.number/i, + /routing.number/i, +]; + +function isSensitiveToken(token: string): boolean { + return SENSITIVE_PATTERNS.some(p => p.test(token)); +} + +function normalizeTextTokens(text: string | undefined): string[] { + if (!text) return []; + const tokens = text + .split(/\s+/) + .map(t => t.trim()) + .filter(t => t.length > 0 && t.length <= MAX_TOKEN_LENGTH) + .filter(t => !isSensitiveToken(t)) + .slice(0, MAX_TEXT_TOKENS); + return tokens; +} + +function normalizeHrefPath(href: string | undefined): string | undefined { + if (!href) return undefined; + try { + const url = new URL(href); + // Keep only pathname, strip query/hash for privacy + return url.pathname || undefined; + } catch { + // Not a full URL — maybe a relative path + if (href.startsWith('/')) { + const queryIndex = href.indexOf('?'); + return queryIndex >= 0 ? href.substring(0, queryIndex) : href; + } + return undefined; + } +} + +function normalizeAttributes(element: SnapshotElement): Record | undefined { + const attrs = (element as any).attributes as Record | undefined; + if (!attrs || typeof attrs !== 'object') return undefined; + + const result: Record = {}; + for (const [key, value] of Object.entries(attrs)) { + if ( + typeof value === 'string' && + value.length <= MAX_ATTR_VALUE_LENGTH && + !isSensitiveToken(value) + ) { + result[key] = value; + } + } + return Object.keys(result).length > 0 ? result : undefined; +} + +/** + * Compute a simple hash of the task goal for task-scoped fingerprint matching. + * Uses a fast, non-cryptographic hash suitable for grouping similar tasks. + */ +export function computeTaskHash(taskGoal: string): string { + const normalized = taskGoal.toLowerCase().trim().replace(/\s+/g, ' '); + let hash = 0; + for (let i = 0; i < normalized.length; i++) { + const chr = normalized.charCodeAt(i); + hash = (hash << 5) - hash + chr; + hash |= 0; // Convert to 32-bit int + } + return `th_${Math.abs(hash).toString(36)}`; +} + +/** + * Extract domain from a URL string. + */ +export function extractDomain(url: string): string { + try { + return new URL(url).hostname; + } catch { + return ''; + } +} + +/** + * Create a learned fingerprint from a successful element interaction. + */ +export function createFingerprint( + element: SnapshotElement, + intent: string, + taskHash: string, + domain: string +): LearnedTargetFingerprint { + return { + domain, + taskHash, + intent, + role: element.role || undefined, + textTokens: normalizeTextTokens(element.text || element.name), + ariaTokens: normalizeTextTokens(element.ariaLabel), + hrefPathPattern: normalizeHrefPath(element.href), + attributePatterns: normalizeAttributes(element), + successCount: 1, + failureCount: 0, + confidence: 0.5, + learnedAt: Date.now(), + }; +} + +/** + * Merge a new fingerprint into an existing list, updating counts and confidence. + * Returns a new array (immutable). + */ +export function mergeFingerprint( + existing: LearnedTargetFingerprint[], + newFp: LearnedTargetFingerprint +): LearnedTargetFingerprint[] { + const matchIdx = existing.findIndex( + fp => + fp.domain === newFp.domain && + fp.taskHash === newFp.taskHash && + fp.intent === newFp.intent && + (fp.role || '') === (newFp.role || '') && + fp.hrefPathPattern === newFp.hrefPathPattern + ); + + if (matchIdx >= 0) { + const existing2 = existing[matchIdx]; + const successCount = existing2.successCount + 1; + const total = successCount + existing2.failureCount; + const confidence = Math.min(1, successCount / total); + const updated: LearnedTargetFingerprint = { + ...existing2, + textTokens: newFp.textTokens ?? existing2.textTokens, + ariaTokens: newFp.ariaTokens ?? existing2.ariaTokens, + attributePatterns: newFp.attributePatterns ?? existing2.attributePatterns, + successCount, + confidence, + lastUsedAt: Date.now(), + }; + const result = [...existing]; + result[matchIdx] = updated; + return result; + } + + return [...existing, newFp]; +} + +/** + * Record a failure for a matching fingerprint (if found). + */ +export function recordFingerprintFailure( + existing: LearnedTargetFingerprint[], + intent: string, + taskHash: string, + domain: string +): LearnedTargetFingerprint[] { + const matchIdx = existing.findIndex( + fp => fp.domain === domain && fp.taskHash === taskHash && fp.intent === intent + ); + + if (matchIdx >= 0) { + const fp = existing[matchIdx]; + const failureCount = fp.failureCount + 1; + const total = fp.successCount + failureCount; + const confidence = Math.max(0, fp.successCount / total); + const updated: LearnedTargetFingerprint = { + ...fp, + failureCount, + confidence, + }; + const result = [...existing]; + result[matchIdx] = updated; + return result; + } + + return existing; +} diff --git a/src/agents/planner-executor/index.ts b/src/agents/planner-executor/index.ts index 5c5478a..e391cdf 100644 --- a/src/agents/planner-executor/index.ts +++ b/src/agents/planner-executor/index.ts @@ -158,6 +158,45 @@ export { type IntentHeuristics, } from './planner-executor-agent'; +// Profiles (extensible categories + task learning) +export type { + DataDrivenPruningPolicy, + BrowserAgentProfile, + ResolvedAgentProfile, + LearnedTargetFingerprint, + DomainProfile, +} from './profile-types'; +export { EMPTY_RESOLVED_PROFILE } from './profile-types'; +export { + DataDrivenPruningPolicySchema, + BrowserAgentProfileSchema, + BrowserAgentProfileArraySchema, + LearnedTargetFingerprintSchema, + DomainProfileSchema, +} from './profile-schema'; +export { ProfileRegistry } from './profile-registry'; +export { pruneWithPolicy } from './data-driven-pruner'; +export { + computeTaskHash, + extractDomain, + createFingerprint, + mergeFingerprint, + recordFingerprintFailure, +} from './fingerprint-normalizer'; +export type { LearningStore } from './learning-store'; +export { InMemoryLearningStore } from './learning-store'; +export { + isSensitiveUrl, + extractFingerprintFromOutcome, + applyFingerprintFailure, + applyFingerprintSuccess, + isFingerprintStale, + isFingerprintExpired, + fingerprintToHint, + computeTaskHash as computeLearningTaskHash, +} from './learning-extractor'; +export type { LearningExtractionOptions, LearningExtractionResult } from './learning-extractor'; + // Runtime (Playwright/Chromium) export { PlaywrightRuntime, diff --git a/src/agents/planner-executor/learning-extractor.ts b/src/agents/planner-executor/learning-extractor.ts new file mode 100644 index 0000000..8033ae4 --- /dev/null +++ b/src/agents/planner-executor/learning-extractor.ts @@ -0,0 +1,315 @@ +/** + * LearningExtractor — Extracts learned fingerprints from successful step outcomes. + * + * Converts structured step outcomes + snapshot elements into normalized + * LearnedTargetFingerprint entries suitable for persistent storage. + * + * Privacy safeguards: + * - Only extracts from policy-allowed, non-sensitive actions + * - Uses normalized tokens, never raw text + * - Applies redaction rules consistent with Gateway upload + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { SnapshotElement, StepOutcome } from './plan-models'; +import { StepStatus } from './plan-models'; +import type { LearnedTargetFingerprint } from './profile-types'; +import { createFingerprint } from './fingerprint-normalizer'; + +/** Sensitive URL patterns — learning is disabled on these pages */ +const SENSITIVE_URL_PATTERNS = [ + /\/checkout/i, + /\/payment/i, + /\/account\/settings/i, + /\/password/i, + /\/billing/i, + /\/security/i, + /\/login/i, + /\/signin/i, + /\/auth/i, +]; + +/** Sensitive element roles — never learn from these */ +const SENSITIVE_ROLES = new Set([ + 'password', + 'credit-card-number', + 'credit-card-cvv', + 'credit-card-expiration', +]); + +/** + * Check if a URL is privacy-sensitive (learning should be disabled). + */ +export function isSensitiveUrl(url: string): boolean { + return SENSITIVE_URL_PATTERNS.some(pattern => pattern.test(url)); +} + +/** + * Check if an element involves sensitive data. + */ +function isSensitiveElement(element: SnapshotElement): boolean { + const role = (element.role || '').toLowerCase(); + if (SENSITIVE_ROLES.has(role)) return true; + + const text = + `${element.text || ''} ${element.name || ''} ${element.ariaLabel || ''}`.toLowerCase(); + return ( + text.includes('password') || + text.includes('credit card') || + text.includes('cvv') || + text.includes('social security') || + text.includes('ssn') + ); +} + +/** + * Extract the element ID from an action string like "CLICK(42)" or "TYPE(12, 'text')". + */ +function extractElementIdFromAction(actionTaken: string | undefined): number | null { + if (!actionTaken) return null; + const match = actionTaken.match(/^[A-Z_]+\((\d+)/); + return match ? parseInt(match[1], 10) : null; +} + +/** + * Extract the intent from the planner action for fingerprinting. + */ +function extractIntentFromOutcome(outcome: StepOutcome): string { + if (outcome.goal) { + return outcome.goal + .toLowerCase() + .replace(/[\s-]+/g, '_') + .slice(0, 50); + } + if (outcome.actionTaken) { + const action = outcome.actionTaken.match(/^[A-Z_]+/)?.[0] || 'unknown'; + return action.toLowerCase(); + } + return 'unknown'; +} + +export interface LearningExtractionOptions { + /** Whether learning is enabled (opt-in, default false) */ + learningEnabled: boolean; + /** The task goal string */ + taskGoal: string; + /** Current page URL */ + currentUrl: string; + /** Hash of the task goal for task-scoped matching */ + taskHash: string; +} + +/** + * Result of learning extraction from a single step outcome. + */ +export interface LearningExtractionResult { + /** Whether extraction was performed */ + extracted: boolean; + /** The fingerprint (if extracted) */ + fingerprint?: LearnedTargetFingerprint; + /** Reason extraction was skipped (for audit logging) */ + skipReason?: string; +} + +/** + * Extract a learned fingerprint from a successful step outcome. + * + * This is the main entry point for L1 learning. It: + * 1. Checks if learning is enabled + * 2. Validates the outcome was successful + * 3. Checks privacy constraints (sensitive URLs, sensitive elements) + * 4. Normalizes the element into a fingerprint + * + * Returns a result with `extracted: true` and the fingerprint, or + * `extracted: false` with a skip reason for audit logging. + */ +export function extractFingerprintFromOutcome( + outcome: StepOutcome, + snapshotElements: SnapshotElement[] | undefined, + options: LearningExtractionOptions +): LearningExtractionResult { + // Check if learning is enabled + if (!options.learningEnabled) { + return { extracted: false, skipReason: 'learning_disabled' }; + } + + // Only learn from successful steps + if (outcome.status !== StepStatus.SUCCESS) { + return { extracted: false, skipReason: 'step_not_successful' }; + } + + // Check for sensitive URL + if (isSensitiveUrl(options.currentUrl)) { + return { extracted: false, skipReason: 'sensitive_url' }; + } + + // We need snapshot elements to find the acted-on element + if (!snapshotElements || snapshotElements.length === 0) { + return { extracted: false, skipReason: 'no_snapshot_elements' }; + } + + // Extract element ID from action + const elementId = extractElementIdFromAction(outcome.actionTaken); + if (elementId === null) { + // DONE action or no element — nothing to learn from + return { extracted: false, skipReason: 'no_element_action' }; + } + + // Find the element in the snapshot + const element = snapshotElements.find(el => el.id === elementId); + if (!element) { + return { extracted: false, skipReason: 'element_not_found' }; + } + + // Check for sensitive element + if (isSensitiveElement(element)) { + return { extracted: false, skipReason: 'sensitive_element' }; + } + + // Extract domain from URL + let domain: string; + try { + domain = new URL(options.currentUrl).hostname; + } catch { + return { extracted: false, skipReason: 'invalid_url' }; + } + + const intent = extractIntentFromOutcome(outcome); + + const fingerprint = createFingerprint(element, intent, options.taskHash, domain); + + return { extracted: true, fingerprint }; +} + +/** + * Apply decay to a fingerprint based on a failure event. + * Returns an updated fingerprint with incremented failure count and reduced confidence. + */ +export function applyFingerprintFailure( + fingerprint: LearnedTargetFingerprint +): LearnedTargetFingerprint { + const failureCount = fingerprint.failureCount + 1; + const totalAttempts = fingerprint.successCount + failureCount; + + // Confidence decays with failures using exponential decay + const rawConfidence = fingerprint.successCount / totalAttempts; + // Apply additional decay factor based on failure count + const decayFactor = Math.pow(0.8, failureCount); + const confidence = Math.max(0, Math.min(1, rawConfidence * decayFactor)); + + return { + ...fingerprint, + failureCount, + confidence, + }; +} + +/** + * Apply success reinforcement to a fingerprint. + * Returns an updated fingerprint with incremented success count and updated confidence. + */ +export function applyFingerprintSuccess( + fingerprint: LearnedTargetFingerprint +): LearnedTargetFingerprint { + const successCount = fingerprint.successCount + 1; + const totalAttempts = successCount + fingerprint.failureCount; + + // Confidence is ratio of successes to total attempts, capped at 1 + const confidence = Math.min(1, successCount / totalAttempts); + + return { + ...fingerprint, + successCount, + confidence, + lastUsedAt: Date.now(), + }; +} + +/** + * Check if a fingerprint should be disabled due to too many failures. + * Fingerprints with 3+ consecutive failures and confidence < 0.3 are considered stale. + */ +export function isFingerprintStale(fingerprint: LearnedTargetFingerprint): boolean { + return fingerprint.failureCount >= 3 && fingerprint.confidence < 0.3; +} + +/** + * Check if a fingerprint has expired based on TTL. + */ +export function isFingerprintExpired( + fingerprint: LearnedTargetFingerprint, + ttlMs: number +): boolean { + const age = Date.now() - fingerprint.learnedAt; + return age > ttlMs; +} + +/** + * Convert a learned fingerprint into a HeuristicHintInput for injection into ComposableHeuristics. + * Only fingerprints with confidence >= minConfidence are converted. + */ +export function fingerprintToHint( + fingerprint: LearnedTargetFingerprint, + minConfidence: number = 0.4 +): { intent: string; textPatterns: string[]; roleFilter: string[]; priority: number } | null { + if (fingerprint.confidence < minConfidence) { + return null; + } + + if (isFingerprintStale(fingerprint)) { + return null; + } + + const textPatterns: string[] = []; + if (fingerprint.textTokens) { + textPatterns.push(...fingerprint.textTokens); + } + if (fingerprint.ariaTokens) { + textPatterns.push(...fingerprint.ariaTokens); + } + + const roleFilter: string[] = []; + if (fingerprint.role) { + roleFilter.push(fingerprint.role); + } + + return { + intent: fingerprint.intent, + textPatterns, + roleFilter, + // Priority proportional to confidence (range 1-10) + priority: Math.round(fingerprint.confidence * 10), + }; +} + +/** + * Compute a task hash from a task goal string. + * Uses a simple hash for environments where crypto.subtle may not be available. + * Returns first 16 hex chars of SHA-256 when available, or a fallback hash. + */ +export async function computeTaskHash(taskGoal: string): Promise { + const normalized = taskGoal.trim().toLowerCase().replace(/\s+/g, ' '); + + // Try Web Crypto API first (available in both extension and browser contexts) + if (typeof crypto !== 'undefined' && crypto.subtle) { + try { + const encoder = new TextEncoder(); + const data = encoder.encode(normalized); + const hashBuffer = await crypto.subtle.digest('SHA-256', data); + const hashArray = Array.from(new Uint8Array(hashBuffer)); + const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); + return hashHex.slice(0, 16); + } catch { + // Fall through to simple hash + } + } + + // Simple fallback hash (djb2 algorithm) + let hash = 5381; + for (let i = 0; i < normalized.length; i++) { + hash = (hash << 5) + hash + normalized.charCodeAt(i); + hash = hash & hash; // Convert to 32-bit integer + } + return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16); +} diff --git a/src/agents/planner-executor/learning-store.ts b/src/agents/planner-executor/learning-store.ts new file mode 100644 index 0000000..62cb5ed --- /dev/null +++ b/src/agents/planner-executor/learning-store.ts @@ -0,0 +1,96 @@ +/** + * LearningStore — interface for persisting and retrieving learned data. + * + * The SDK defines the interface; the extension provides the implementation + * backed by chrome.storage.local. This keeps the SDK browser-agnostic. + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { LearnedTargetFingerprint, DomainProfile } from './profile-types'; + +export interface LearningStore { + // ---- Fingerprints (L1) ---- + + /** Get all fingerprints matching a domain and/or task hash */ + getFingerprints(filter: { + domain: string; + taskHash?: string; + }): Promise; + + /** Store/update fingerprints (merge by key fields) */ + putFingerprints(fingerprints: LearnedTargetFingerprint[]): Promise; + + /** Remove fingerprints older than maxAgeMs */ + pruneFingerprints(maxAgeMs: number): Promise; + + // ---- Domain Profiles (L3) ---- + + /** Get the domain profile for a given domain */ + getDomainProfile(domain: string): Promise; + + /** Store/update a domain profile */ + putDomainProfile(profile: DomainProfile): Promise; + + // ---- General ---- + + /** Clear all learned data */ + clear(): Promise; +} + +/** + * In-memory LearningStore implementation for testing and non-extension contexts. + */ +export class InMemoryLearningStore implements LearningStore { + private fingerprints: LearnedTargetFingerprint[] = []; + private domainProfiles: Map = new Map(); + + async getFingerprints(filter: { + domain: string; + taskHash?: string; + }): Promise { + return this.fingerprints.filter(fp => { + if (fp.domain !== filter.domain) return false; + if (filter.taskHash && fp.taskHash !== filter.taskHash) return false; + return true; + }); + } + + async putFingerprints(fingerprints: LearnedTargetFingerprint[]): Promise { + for (const fp of fingerprints) { + const idx = this.fingerprints.findIndex( + existing => + existing.domain === fp.domain && + existing.taskHash === fp.taskHash && + existing.intent === fp.intent && + (existing.role || '') === (fp.role || '') && + existing.hrefPathPattern === fp.hrefPathPattern + ); + if (idx >= 0) { + this.fingerprints[idx] = fp; + } else { + this.fingerprints.push(fp); + } + } + } + + async pruneFingerprints(maxAgeMs: number): Promise { + const cutoff = Date.now() - maxAgeMs; + const before = this.fingerprints.length; + this.fingerprints = this.fingerprints.filter(fp => fp.learnedAt >= cutoff); + return before - this.fingerprints.length; + } + + async getDomainProfile(domain: string): Promise { + return this.domainProfiles.get(domain) ?? null; + } + + async putDomainProfile(profile: DomainProfile): Promise { + this.domainProfiles.set(profile.domain, profile); + } + + async clear(): Promise { + this.fingerprints = []; + this.domainProfiles.clear(); + } +} diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts index 7160a47..3a33d95 100644 --- a/src/agents/planner-executor/planner-executor-agent.ts +++ b/src/agents/planner-executor/planner-executor-agent.ts @@ -64,6 +64,8 @@ import { import { ComposableHeuristics } from './composable-heuristics'; import { normalizeTaskCategory, type TaskCategory } from './task-category'; import { getCommonHint } from './common-hints'; +import type { ResolvedAgentProfile } from './profile-types'; +import { HeuristicHint } from './heuristic-hint'; import { detectModalAppearance, detectModalDismissed, @@ -464,6 +466,10 @@ export interface PlannerExecutorAgentOptions { intentHeuristics?: IntentHeuristics; /** Enable verbose logging */ verbose?: boolean; + /** Resolved profile providing data-driven pruning, hints, and scoring overrides */ + resolvedProfile?: ResolvedAgentProfile; + /** Callback invoked after each step with structured outcome (for learning extraction) */ + onStepOutcome?: (outcome: StepOutcome, snapshotElements?: SnapshotElement[]) => void; } // --------------------------------------------------------------------------- @@ -508,6 +514,8 @@ export class PlannerExecutorAgent { private currentTaskCategory: TaskCategory | null = null; private tokenCollector = new TokenUsageCollector(); private recoveryState: RecoveryState | null = null; + private resolvedProfile?: ResolvedAgentProfile; + private onStepOutcome?: (outcome: StepOutcome, snapshotElements?: SnapshotElement[]) => void; // Run state private runId: string | null = null; @@ -528,6 +536,8 @@ export class PlannerExecutorAgent { this.composableHeuristics = new ComposableHeuristics({ staticHeuristics: this.baseIntentHeuristics, }); + this.resolvedProfile = options.resolvedProfile; + this.onStepOutcome = options.onStepOutcome; } // --------------------------------------------------------------------------- @@ -691,12 +701,20 @@ export class PlannerExecutorAgent { this.actionHistory = []; this.currentStepIndex = 0; this.tokenCollector.reset(); - this.currentTaskCategory = normalizeTaskCategory(options.category); + this.currentTaskCategory = + normalizeTaskCategory(options.category) ?? this.resolvedProfile?.categoryHint ?? null; this.composableHeuristics = new ComposableHeuristics({ staticHeuristics: this.baseIntentHeuristics, taskCategory: this.currentTaskCategory, }); - this.composableHeuristics.clearStepHints(); + // Seed heuristics with profile hints (if any) + if (this.resolvedProfile?.heuristicHints && this.resolvedProfile.heuristicHints.length > 0) { + this.composableHeuristics.setStepHints( + this.resolvedProfile.heuristicHints.map(h => new HeuristicHint(h)) + ); + } else { + this.composableHeuristics.clearStepHints(); + } this.recoveryState = this.config.recovery.enabled ? new RecoveryState(this.config.recovery) : null; @@ -986,6 +1004,13 @@ export class PlannerExecutorAgent { let finalOutcome = outcome; stepOutcomes.push(finalOutcome); + // Emit structured step outcome for learning extraction + try { + this.onStepOutcome?.(finalOutcome, ctx.snapshot?.elements); + } catch { + // Don't fail the run on callback errors + } + // Update current URL let urlAfter = await runtime.getCurrentUrl(); currentUrl = urlAfter; @@ -1873,6 +1898,8 @@ export class PlannerExecutorAgent { relaxationLevel: step?.relaxPruning ? 1 : 0, minElementCount: cfg.pruningMinElements, maxRelaxation: cfg.pruningMaxRelaxation, + profilePolicy: this.resolvedProfile?.pruningPolicy, + learnedFingerprints: this.resolvedProfile?.learnedFingerprints, }); pruningCategory = pruned.category; prunedNodeCount = pruned.actionableElementCount; @@ -1997,6 +2024,8 @@ export class PlannerExecutorAgent { relaxationLevel: step?.relaxPruning ? 1 : 0, minElementCount: cfg.pruningMinElements, maxRelaxation: cfg.pruningMaxRelaxation, + profilePolicy: this.resolvedProfile?.pruningPolicy, + learnedFingerprints: this.resolvedProfile?.learnedFingerprints, }); pruningCategory = pruned.category; prunedNodeCount = pruned.actionableElementCount; diff --git a/src/agents/planner-executor/profile-registry.ts b/src/agents/planner-executor/profile-registry.ts new file mode 100644 index 0000000..22bb018 --- /dev/null +++ b/src/agents/planner-executor/profile-registry.ts @@ -0,0 +1,160 @@ +/** + * ProfileRegistry — registers, resolves, and manages BrowserAgentProfiles. + * + * Resolution order: + * 1. Sort profiles by priority descending (default 0) + * 2. For each profile: check if any taskKeywords appear in taskText (case-insensitive) + * AND any domainPatterns match the domain (glob-style) + * 3. First matching profile wins → extract hints, pruning, scoring + * 4. If no profile matches → return EMPTY_RESOLVED_PROFILE + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { + BrowserAgentProfile, + ResolvedAgentProfile, + LearnedTargetFingerprint, +} from './profile-types'; +import { EMPTY_RESOLVED_PROFILE } from './profile-types'; +import { BrowserAgentProfileSchema, BrowserAgentProfileArraySchema } from './profile-schema'; +import type { TaskCategory } from './task-category'; + +export class ProfileRegistry { + private profiles: BrowserAgentProfile[] = []; + + /** Register a profile */ + register(profile: BrowserAgentProfile): void { + const existing = this.profiles.findIndex(p => p.id === profile.id); + if (existing >= 0) { + this.profiles[existing] = profile; + } else { + this.profiles.push(profile); + } + } + + /** Load one or more profiles from validated JSON */ + loadFromJSON(json: unknown): { loaded: number; errors: string[] } { + const errors: string[] = []; + let loaded = 0; + + // Try as array first, then as single object + let items: unknown[]; + const arrayResult = BrowserAgentProfileArraySchema.safeParse(json); + if (arrayResult.success) { + items = arrayResult.data; + } else { + const singleResult = BrowserAgentProfileSchema.safeParse(json); + if (singleResult.success) { + items = [singleResult.data]; + } else { + return { + loaded: 0, + errors: ['Invalid profile JSON: must be a profile object or array of profiles'], + }; + } + } + + for (let i = 0; i < items.length; i++) { + const result = BrowserAgentProfileSchema.safeParse(items[i]); + if (result.success) { + // Zod enum produces string literals; cast to BrowserAgentProfile + // whose taskCategoryHint uses the TaskCategory enum + this.register(result.data as unknown as BrowserAgentProfile); + loaded++; + } else { + errors.push(`Profile at index ${i}: ${result.error.message}`); + } + } + + return { loaded, errors }; + } + + /** Resolve the best matching profile for a task + domain */ + resolve( + taskText: string, + domain: string, + fingerprints?: LearnedTargetFingerprint[] + ): ResolvedAgentProfile { + const sorted = [...this.profiles].sort((a, b) => (b.priority ?? 0) - (a.priority ?? 0)); + const normalizedTask = taskText.toLowerCase(); + + for (const profile of sorted) { + if (this.matchesProfile(profile, normalizedTask, domain)) { + return { + categoryHint: profile.taskCategoryHint, + pruningPolicy: profile.pruningPolicy, + scoringProfile: profile.scoringProfile, + heuristicHints: profile.heuristicHints ?? [], + learnedFingerprints: fingerprints ?? [], + }; + } + } + + return { + ...EMPTY_RESOLVED_PROFILE, + learnedFingerprints: fingerprints ?? [], + }; + } + + /** List all registered profiles */ + list(): BrowserAgentProfile[] { + return [...this.profiles]; + } + + /** Remove a profile by id */ + unregister(id: string): boolean { + const index = this.profiles.findIndex(p => p.id === id); + if (index < 0) return false; + this.profiles.splice(index, 1); + return true; + } + + /** Clear all profiles */ + clear(): void { + this.profiles = []; + } + + private matchesProfile( + profile: BrowserAgentProfile, + normalizedTask: string, + domain: string + ): boolean { + const { taskKeywords, domainPatterns } = profile.match; + + const keywordsMatch = + !taskKeywords || + taskKeywords.length === 0 || + taskKeywords.some(kw => normalizedTask.includes(kw.toLowerCase())); + + const domainMatch = + !domainPatterns || + domainPatterns.length === 0 || + domainPatterns.some(pattern => globMatch(pattern, domain)); + + return keywordsMatch && domainMatch; + } +} + +/** + * Simple glob-style matching for domain patterns. + * Supports "*" as a wildcard (e.g., "*.booking.com" matches "www.booking.com"). + */ +function globMatch(pattern: string, domain: string): boolean { + const normalizedPattern = pattern.toLowerCase(); + const normalizedDomain = domain.toLowerCase(); + + if (!normalizedPattern.includes('*')) { + return ( + normalizedDomain === normalizedPattern || normalizedDomain.endsWith('.' + normalizedPattern) + ); + } + + // Convert glob to regex: escape dots, replace * with .* + const regexStr = normalizedPattern.replace(/[.+^${}()|[\]\\]/g, '\\$&').replace(/\*/g, '.*'); + try { + return new RegExp(`^${regexStr}$`).test(normalizedDomain); + } catch { + return false; + } +} diff --git a/src/agents/planner-executor/profile-schema.ts b/src/agents/planner-executor/profile-schema.ts new file mode 100644 index 0000000..faf748a --- /dev/null +++ b/src/agents/planner-executor/profile-schema.ts @@ -0,0 +1,75 @@ +/** + * Zod schemas for profile JSON validation. + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import { z } from 'zod'; +import { TaskCategory } from './task-category'; + +export const DataDrivenPruningPolicySchema = z.object({ + allowedRoles: z.array(z.string().min(1)).min(1), + includeTextPatterns: z.array(z.string()).optional(), + excludeTextPatterns: z.array(z.string()).optional(), + maxElements: z.number().int().min(5).max(200), + maxElementsRelaxed: z.number().int().min(5).max(200), + maxElementsLoose: z.number().int().min(5).max(200).optional(), +}); + +export const HeuristicHintInputSchema = z.object({ + intentPattern: z.string().optional(), + intent_pattern: z.string().optional(), + textPatterns: z.array(z.string()).optional(), + text_patterns: z.array(z.string()).optional(), + roleFilter: z.array(z.string()).optional(), + role_filter: z.array(z.string()).optional(), + priority: z.number().int().min(0).optional(), + attributePatterns: z.record(z.string(), z.string()).optional(), + attribute_patterns: z.record(z.string(), z.string()).optional(), +}); + +export const BrowserAgentProfileSchema = z.object({ + id: z.string().min(1), + label: z.string().min(1), + version: z.number().int().min(1), + match: z.object({ + taskKeywords: z.array(z.string()).optional(), + domainPatterns: z.array(z.string()).optional(), + }), + taskCategoryHint: z.nativeEnum(TaskCategory).optional(), + pruningPolicy: DataDrivenPruningPolicySchema.optional(), + scoringProfile: z.record(z.string(), z.unknown()).optional(), + heuristicHints: z.array(HeuristicHintInputSchema).optional(), + source: z.enum(['built_in', 'user', 'learned', 'imported']), + priority: z.number().int().optional(), +}); + +export const BrowserAgentProfileArraySchema = z.array(BrowserAgentProfileSchema); + +export const LearnedTargetFingerprintSchema = z.object({ + domain: z.string().min(1), + taskHash: z.string().min(1), + intent: z.string().min(1), + role: z.string().optional(), + textTokens: z.array(z.string()).optional(), + ariaTokens: z.array(z.string()).optional(), + hrefPathPattern: z.string().optional(), + attributePatterns: z.record(z.string(), z.string()).optional(), + successCount: z.number().int().min(0), + failureCount: z.number().int().min(0), + confidence: z.number().min(0).max(1), + learnedAt: z.number(), + lastUsedAt: z.number().optional(), +}); + +export const DomainProfileSchema = z.object({ + domain: z.string().min(1), + preferredCategoryHint: z.nativeEnum(TaskCategory).optional(), + preferredPruningPolicy: DataDrivenPruningPolicySchema.optional(), + preferredSnapshotLimit: z.number().int().min(5).max(200), + avgRelaxationLevel: z.number().min(0), + commonIntents: z.array(z.string()), + runCount: z.number().int().min(0), + successRate: z.number().min(0).max(1), + updatedAt: z.number(), +}); diff --git a/src/agents/planner-executor/profile-types.ts b/src/agents/planner-executor/profile-types.ts new file mode 100644 index 0000000..e69755e --- /dev/null +++ b/src/agents/planner-executor/profile-types.ts @@ -0,0 +1,176 @@ +/** + * Profile types for extensible task categories and task learning. + * + * @see docs/plans/browser-agent/2026-05-02-extensible-categories-and-task-learning.md + */ + +import type { TaskCategory } from './task-category'; +import type { HeuristicHintInput } from './heuristic-hint'; + +// --------------------------------------------------------------------------- +// Data-Driven Pruning Policy +// --------------------------------------------------------------------------- + +export interface DataDrivenPruningPolicy { + /** Element roles to include (lowercased, e.g. "button", "link") */ + allowedRoles: string[]; + /** Text patterns to include (case-insensitive substring) */ + includeTextPatterns?: string[]; + /** Text patterns to exclude (case-insensitive substring) */ + excludeTextPatterns?: string[]; + /** Max elements at relaxation level 0 */ + maxElements: number; + /** Max elements at relaxation level 1 */ + maxElementsRelaxed: number; + /** Max elements at relaxation level 2+ */ + maxElementsLoose?: number; +} + +// --------------------------------------------------------------------------- +// Browser Agent Profile +// --------------------------------------------------------------------------- + +export interface BrowserAgentProfile { + /** Unique profile identifier */ + id: string; + + /** Display label for UI */ + label: string; + + /** Schema version for forward compatibility */ + version: number; + + /** When and how this profile should be activated */ + match: { + /** Keywords found in task/goal text (case-insensitive substring) */ + taskKeywords?: string[]; + /** URL hostname patterns (glob-style, e.g., "*.booking.com") */ + domainPatterns?: string[]; + }; + + /** Optional hint to a built-in TaskCategory for SDK compatibility */ + taskCategoryHint?: TaskCategory; + + /** Custom pruning policy (used when taskCategoryHint is absent or insufficient) */ + pruningPolicy?: DataDrivenPruningPolicy; + + /** Content-script scoring profile overrides */ + scoringProfile?: Record; + + /** Domain-specific heuristic hints */ + heuristicHints?: HeuristicHintInput[]; + + /** Where this profile came from */ + source: 'built_in' | 'user' | 'learned' | 'imported'; + + /** Priority when multiple profiles match (higher = checked first) */ + priority?: number; +} + +// --------------------------------------------------------------------------- +// Resolved Agent Profile (runtime composition) +// --------------------------------------------------------------------------- + +export interface ResolvedAgentProfile { + /** Optional built-in category hint */ + categoryHint?: TaskCategory; + + /** Data-driven pruning policy (from profile or learned) */ + pruningPolicy?: DataDrivenPruningPolicy; + + /** Content-script scoring profile overrides */ + scoringProfile?: Record; + + /** Heuristic hints from profiles and learned fingerprints */ + heuristicHints: HeuristicHintInput[]; + + /** Learned target fingerprints validated against current snapshot */ + learnedFingerprints: LearnedTargetFingerprint[]; +} + +// --------------------------------------------------------------------------- +// Learned Target Fingerprints (L1 Learning) +// --------------------------------------------------------------------------- + +export interface LearnedTargetFingerprint { + /** Domain (e.g., "amazon.com") */ + domain: string; + + /** Hash of the task goal for task-scoped matching */ + taskHash: string; + + /** The intent that was being resolved */ + intent: string; + + /** Element role */ + role?: string; + + /** Normalized visible text tokens (bounded, redacted) */ + textTokens?: string[]; + + /** Normalized aria/name tokens */ + ariaTokens?: string[]; + + /** Sanitized href path pattern (e.g., "/dp/", "/s?k=") */ + hrefPathPattern?: string; + + /** Stable attribute patterns */ + attributePatterns?: Record; + + /** How many times this fingerprint led to a successful action */ + successCount: number; + + /** How many times this fingerprint was tried and failed */ + failureCount: number; + + /** Computed confidence (0-1), derived from success/failure counts and recency */ + confidence: number; + + /** When this fingerprint was first learned */ + learnedAt: number; + + /** When this fingerprint was last successfully used */ + lastUsedAt?: number; +} + +// --------------------------------------------------------------------------- +// Domain Profile (L3 Learning) +// --------------------------------------------------------------------------- + +export interface DomainProfile { + /** Domain */ + domain: string; + + /** Preferred built-in TaskCategory hint */ + preferredCategoryHint?: TaskCategory; + + /** Preferred custom pruning policy (learned from successful runs) */ + preferredPruningPolicy?: DataDrivenPruningPolicy; + + /** Snapshot limit that worked */ + preferredSnapshotLimit: number; + + /** Average relaxation level needed */ + avgRelaxationLevel: number; + + /** Common intents seen on this domain */ + commonIntents: string[]; + + /** Number of completed runs on this domain */ + runCount: number; + + /** Success rate (0-1) */ + successRate: number; + + /** Last updated */ + updatedAt: number; +} + +// --------------------------------------------------------------------------- +// Empty / default helpers +// --------------------------------------------------------------------------- + +export const EMPTY_RESOLVED_PROFILE: ResolvedAgentProfile = { + heuristicHints: [], + learnedFingerprints: [], +}; diff --git a/src/agents/planner-executor/pruning-types.ts b/src/agents/planner-executor/pruning-types.ts index 4eb28a7..d981d6e 100644 --- a/src/agents/planner-executor/pruning-types.ts +++ b/src/agents/planner-executor/pruning-types.ts @@ -1,4 +1,5 @@ import type { Snapshot, SnapshotElement } from './plan-models'; +import type { DataDrivenPruningPolicy, LearnedTargetFingerprint } from './profile-types'; export enum PruningTaskCategory { SHOPPING = 'shopping', @@ -23,6 +24,10 @@ export interface PruneSnapshotOptions { goal: string; category: PruningTaskCategory; relaxationLevel?: number; + /** Optional data-driven pruning policy from a resolved BrowserAgentProfile */ + profilePolicy?: DataDrivenPruningPolicy; + /** Optional learned fingerprints for fingerprint-boost scoring */ + learnedFingerprints?: LearnedTargetFingerprint[]; } export interface PruningRecoveryOptions extends PruneSnapshotOptions { diff --git a/src/browser-agent.ts b/src/browser-agent.ts index 7eea1f0..d2284d9 100644 --- a/src/browser-agent.ts +++ b/src/browser-agent.ts @@ -89,3 +89,49 @@ export { type AgentRuntime, type IntentHeuristics, } from './agents/planner-executor/planner-executor-agent'; + +// Profile and learning exports +export { + type BrowserAgentProfile, + type DataDrivenPruningPolicy, + type ResolvedAgentProfile, + type LearnedTargetFingerprint, + type DomainProfile, + EMPTY_RESOLVED_PROFILE, +} from './agents/planner-executor/profile-types'; +export { + BrowserAgentProfileSchema, + BrowserAgentProfileArraySchema, +} from './agents/planner-executor/profile-schema'; +export { ProfileRegistry } from './agents/planner-executor/profile-registry'; +export { pruneWithPolicy } from './agents/planner-executor/data-driven-pruner'; +export { + computeTaskHash, + extractDomain, + createFingerprint, + mergeFingerprint, + recordFingerprintFailure, +} from './agents/planner-executor/fingerprint-normalizer'; +export type { LearningStore } from './agents/planner-executor/learning-store'; +export { InMemoryLearningStore } from './agents/planner-executor/learning-store'; +export { + detectPruningCategory, + pruneSnapshotForTask, +} from './agents/planner-executor/category-pruner'; +export { + type PruningTaskCategory, + type PruneSnapshotOptions, +} from './agents/planner-executor/pruning-types'; +export { + isSensitiveUrl, + extractFingerprintFromOutcome, + applyFingerprintFailure, + applyFingerprintSuccess, + isFingerprintStale, + isFingerprintExpired, + fingerprintToHint, +} from './agents/planner-executor/learning-extractor'; +export type { + LearningExtractionOptions, + LearningExtractionResult, +} from './agents/planner-executor/learning-extractor'; diff --git a/tests/data-driven-pruner.test.ts b/tests/data-driven-pruner.test.ts new file mode 100644 index 0000000..d55af25 --- /dev/null +++ b/tests/data-driven-pruner.test.ts @@ -0,0 +1,136 @@ +import { pruneWithPolicy } from '../src/agents/planner-executor/data-driven-pruner'; +import type { + DataDrivenPruningPolicy, + LearnedTargetFingerprint, +} from '../src/agents/planner-executor/profile-types'; +import { PruningTaskCategory } from '../src/agents/planner-executor/pruning-types'; +import type { Snapshot, SnapshotElement } from '../src/agents/planner-executor/plan-models'; + +describe('pruneWithPolicy', () => { + const basePolicy: DataDrivenPruningPolicy = { + allowedRoles: ['button', 'link', 'textbox', 'searchbox'], + includeTextPatterns: ['search', 'find', 'go'], + excludeTextPatterns: ['privacy', 'terms', 'cookie'], + maxElements: 10, + maxElementsRelaxed: 20, + maxElementsLoose: 30, + }; + + function makeSnapshot(elements: Partial[]): Snapshot { + return { + url: 'https://example.com', + title: 'Test Page', + elements: elements.map((e, i) => ({ + id: String(i), + role: 'button', + text: '', + ...e, + })) as SnapshotElement[], + }; + } + + describe('maxNodes by relaxation level', () => { + it('should use maxElements at relaxation level 0', () => { + const snapshot = makeSnapshot([]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 0, PruningTaskCategory.SEARCH); + expect(result.maxNodes).toBe(10); + }); + + it('should use maxElementsRelaxed at relaxation level 1', () => { + const snapshot = makeSnapshot([]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 1, PruningTaskCategory.SEARCH); + expect(result.maxNodes).toBe(20); + }); + + it('should use maxElementsLoose at relaxation level 2+', () => { + const snapshot = makeSnapshot([]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 2, PruningTaskCategory.SEARCH); + expect(result.maxNodes).toBe(30); + }); + + it('should fall back when maxElementsLoose not set', () => { + const noLoose = { ...basePolicy, maxElementsLoose: undefined }; + const snapshot = makeSnapshot([]); + const result = pruneWithPolicy(snapshot, noLoose, 'search', 3, PruningTaskCategory.SEARCH); + expect(result.maxNodes).toBe(40); // min(maxElementsRelaxed * 2, 100) + }); + }); + + describe('element filtering', () => { + it('should filter to allowed roles', () => { + const snapshot = makeSnapshot([ + { role: 'button', text: 'Search' }, + { role: 'img', text: 'Search icon' }, + { role: 'link', text: 'Find more' }, + ]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 0, PruningTaskCategory.SEARCH); + const roles = result.elements.map(e => e.role); + expect(roles).not.toContain('img'); + expect(roles).toContain('button'); + expect(roles).toContain('link'); + }); + + it('should exclude elements matching exclude patterns', () => { + const snapshot = makeSnapshot([ + { role: 'button', text: 'Search' }, + { role: 'link', text: 'Privacy Policy' }, + { role: 'link', text: 'Cookie Settings' }, + ]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 0, PruningTaskCategory.SEARCH); + const texts = result.elements.map(e => e.text); + expect(texts).not.toContain('Privacy Policy'); + expect(texts).not.toContain('Cookie Settings'); + }); + + it('should require include patterns match when specified', () => { + const snapshot = makeSnapshot([ + { role: 'button', text: 'Search' }, + { role: 'button', text: 'Random' }, + ]); + const result = pruneWithPolicy(snapshot, basePolicy, 'search', 0, PruningTaskCategory.SEARCH); + const texts = result.elements.map(e => e.text); + expect(texts).toContain('Search'); + expect(texts).not.toContain('Random'); + }); + }); + + describe('fingerprint boosting', () => { + it('should boost elements matching learned fingerprints', () => { + const policyNoInclude: DataDrivenPruningPolicy = { + allowedRoles: ['button'], + maxElements: 2, + maxElementsRelaxed: 4, + }; + + const fingerprints: LearnedTargetFingerprint[] = [ + { + domain: 'example.com', + taskHash: 'abc', + intent: 'search', + role: 'button', + textTokens: ['go'], + successCount: 3, + failureCount: 0, + confidence: 0.9, + learnedAt: Date.now(), + }, + ]; + + const snapshot = makeSnapshot([ + { role: 'button', text: 'Go', clickable: true }, + { role: 'button', text: 'Other', clickable: true }, + ]); + + const result = pruneWithPolicy( + snapshot, + policyNoInclude, + 'search', + 0, + PruningTaskCategory.SEARCH, + fingerprints + ); + // "Go" button should be first due to fingerprint boost + expect(result.elements[0].text).toBe('Go'); + }); + }); +}); diff --git a/tests/fingerprint-normalizer.test.ts b/tests/fingerprint-normalizer.test.ts new file mode 100644 index 0000000..69fc9d6 --- /dev/null +++ b/tests/fingerprint-normalizer.test.ts @@ -0,0 +1,150 @@ +import { + computeTaskHash, + extractDomain, + createFingerprint, + mergeFingerprint, + recordFingerprintFailure, +} from '../src/agents/planner-executor/fingerprint-normalizer'; +import type { LearnedTargetFingerprint } from '../src/agents/planner-executor/profile-types'; + +describe('fingerprint-normalizer', () => { + describe('computeTaskHash', () => { + it('should produce a stable hash for the same input', () => { + const h1 = computeTaskHash('Search for headphones on Amazon'); + const h2 = computeTaskHash('Search for headphones on Amazon'); + expect(h1).toBe(h2); + }); + + it('should normalize whitespace and case', () => { + const h1 = computeTaskHash(' Search for laptops '); + const h2 = computeTaskHash('search for laptops'); + expect(h1).toBe(h2); + }); + + it('should produce different hashes for different tasks', () => { + const h1 = computeTaskHash('Search for laptops'); + const h2 = computeTaskHash('Book a flight to Paris'); + expect(h1).not.toBe(h2); + }); + + it('should return a hash prefixed with th_', () => { + const hash = computeTaskHash('test task'); + expect(hash).toMatch(/^th_[a-z0-9]+$/); + }); + }); + + describe('extractDomain', () => { + it('should extract hostname from a URL', () => { + expect(extractDomain('https://www.example.com/path')).toBe('www.example.com'); + }); + + it('should return empty string for invalid URL', () => { + expect(extractDomain('not-a-url')).toBe(''); + }); + }); + + describe('createFingerprint', () => { + it('should create a fingerprint with normalized text tokens', () => { + const element = { + id: 1, + role: 'button', + text: 'Click here to search', + name: '', + href: '/search?q=test', + } as any; + const fp = createFingerprint(element, 'search', 'th_abc', 'example.com'); + + expect(fp.domain).toBe('example.com'); + expect(fp.taskHash).toBe('th_abc'); + expect(fp.intent).toBe('search'); + expect(fp.role).toBe('button'); + expect(fp.textTokens).toEqual(['Click', 'here', 'to', 'search']); + expect(fp.successCount).toBe(1); + expect(fp.confidence).toBe(0.5); + }); + + it('should sanitize href to path only', () => { + const element = { + id: 1, + role: 'link', + text: 'Go', + name: '', + href: 'https://example.com/search?q=shoes&ref=nav', + } as any; + const fp = createFingerprint(element, 'navigate', 'th_abc', 'example.com'); + expect(fp.hrefPathPattern).toBe('/search'); + }); + + it('should handle elements with no text', () => { + const element = { id: 1, role: 'button', text: '', name: '', href: '' } as any; + const fp = createFingerprint(element, 'click', 'th_abc', 'example.com'); + expect(fp.textTokens).toEqual([]); + }); + }); + + describe('mergeFingerprint', () => { + it('should add new fingerprint to empty list', () => { + const fp = makeFingerprint('example.com', 'th_1', 'search', 'button'); + const result = mergeFingerprint([], fp); + expect(result).toHaveLength(1); + }); + + it('should update existing fingerprint on match', () => { + const existing = [makeFingerprint('example.com', 'th_1', 'search', 'button')]; + existing[0].successCount = 1; + existing[0].confidence = 0.5; + + const newFp = makeFingerprint('example.com', 'th_1', 'search', 'button'); + const result = mergeFingerprint(existing, newFp); + + expect(result).toHaveLength(1); + expect(result[0].successCount).toBe(2); + expect(result[0].confidence).toBeGreaterThan(0.5); + }); + + it('should add as new entry when no match', () => { + const existing = [makeFingerprint('example.com', 'th_1', 'search', 'button')]; + const newFp = makeFingerprint('other.com', 'th_2', 'search', 'button'); + const result = mergeFingerprint(existing, newFp); + + expect(result).toHaveLength(2); + }); + }); + + describe('recordFingerprintFailure', () => { + it('should increment failure count for matching fingerprint', () => { + const existing = [makeFingerprint('example.com', 'th_1', 'search', 'button')]; + existing[0].successCount = 3; + existing[0].confidence = 1.0; + + const result = recordFingerprintFailure(existing, 'search', 'th_1', 'example.com'); + expect(result[0].failureCount).toBe(1); + expect(result[0].confidence).toBeLessThan(1.0); + }); + + it('should not modify list when no match', () => { + const existing = [makeFingerprint('example.com', 'th_1', 'search', 'button')]; + const result = recordFingerprintFailure(existing, 'other', 'th_2', 'other.com'); + expect(result[0].failureCount).toBe(0); + }); + }); +}); + +function makeFingerprint( + domain: string, + taskHash: string, + intent: string, + role: string +): LearnedTargetFingerprint { + return { + domain, + taskHash, + intent, + role, + textTokens: ['click'], + successCount: 1, + failureCount: 0, + confidence: 0.5, + learnedAt: Date.now(), + }; +} diff --git a/tests/learning-extractor.test.ts b/tests/learning-extractor.test.ts new file mode 100644 index 0000000..4b99122 --- /dev/null +++ b/tests/learning-extractor.test.ts @@ -0,0 +1,296 @@ +import { + isSensitiveUrl, + extractFingerprintFromOutcome, + applyFingerprintFailure, + applyFingerprintSuccess, + isFingerprintStale, + isFingerprintExpired, + fingerprintToHint, + computeTaskHash, +} from '../src/agents/planner-executor/learning-extractor'; +import { StepStatus } from '../src/agents/planner-executor/plan-models'; +import type { LearnedTargetFingerprint } from '../src/agents/planner-executor/profile-types'; + +function makeOutcome(overrides: Record = {}) { + return { + stepId: 1, + status: StepStatus.SUCCESS, + actionTaken: 'CLICK(5)', + goal: 'search for headphones', + ...overrides, + } as any; +} + +function makeElements() { + return [ + { id: 1, role: 'textbox', text: 'Search', name: 'q', href: '' }, + { id: 5, role: 'button', text: 'Search', name: 'btnSearch', href: '/search' }, + { id: 10, role: 'link', text: 'Add to Cart', name: '', href: '/cart/add' }, + ] as any[]; +} + +function makeFingerprint( + overrides: Partial = {} +): LearnedTargetFingerprint { + return { + domain: 'example.com', + taskHash: 'th_abc123', + intent: 'search', + role: 'button', + textTokens: ['search'], + successCount: 1, + failureCount: 0, + confidence: 0.5, + learnedAt: Date.now(), + ...overrides, + }; +} + +describe('learning-extractor', () => { + describe('isSensitiveUrl', () => { + it('should detect checkout URLs as sensitive', () => { + expect(isSensitiveUrl('https://shop.com/checkout')).toBe(true); + }); + + it('should detect payment URLs as sensitive', () => { + expect(isSensitiveUrl('https://shop.com/payment')).toBe(true); + }); + + it('should detect login URLs as sensitive', () => { + expect(isSensitiveUrl('https://example.com/login')).toBe(true); + }); + + it('should detect auth URLs as sensitive', () => { + expect(isSensitiveUrl('https://example.com/auth/callback')).toBe(true); + }); + + it('should allow normal shopping URLs', () => { + expect(isSensitiveUrl('https://amazon.com/product/laptop')).toBe(false); + }); + + it('should allow search URLs', () => { + expect(isSensitiveUrl('https://google.com/search?q=test')).toBe(false); + }); + }); + + describe('extractFingerprintFromOutcome', () => { + const defaultOptions = { + learningEnabled: true, + taskGoal: 'Search for headphones', + currentUrl: 'https://example.com/search', + taskHash: 'th_abc123', + }; + + it('should skip when learning is disabled', () => { + const result = extractFingerprintFromOutcome(makeOutcome(), makeElements(), { + ...defaultOptions, + learningEnabled: false, + }); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('learning_disabled'); + }); + + it('should skip when step is not successful', () => { + const result = extractFingerprintFromOutcome( + makeOutcome({ status: StepStatus.FAILED }), + makeElements(), + defaultOptions + ); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('step_not_successful'); + }); + + it('should skip on sensitive URLs', () => { + const result = extractFingerprintFromOutcome(makeOutcome(), makeElements(), { + ...defaultOptions, + currentUrl: 'https://shop.com/checkout', + }); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('sensitive_url'); + }); + + it('should skip when no snapshot elements', () => { + const result = extractFingerprintFromOutcome(makeOutcome(), undefined, defaultOptions); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('no_snapshot_elements'); + }); + + it('should skip when action has no element ID (e.g., DONE)', () => { + const result = extractFingerprintFromOutcome( + makeOutcome({ actionTaken: 'DONE()' }), + makeElements(), + defaultOptions + ); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('no_element_action'); + }); + + it('should skip when element is not found in snapshot', () => { + const result = extractFingerprintFromOutcome( + makeOutcome({ actionTaken: 'CLICK(999)' }), + makeElements(), + defaultOptions + ); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('element_not_found'); + }); + + it('should skip when element is a password field', () => { + const elements = [{ id: 5, role: 'password', text: 'Enter password', name: 'pw', href: '' }]; + const result = extractFingerprintFromOutcome(makeOutcome(), elements, defaultOptions); + expect(result.extracted).toBe(false); + expect(result.skipReason).toBe('sensitive_element'); + }); + + it('should extract fingerprint from successful CLICK action', () => { + const result = extractFingerprintFromOutcome(makeOutcome(), makeElements(), defaultOptions); + expect(result.extracted).toBe(true); + expect(result.fingerprint).toBeDefined(); + expect(result.fingerprint!.domain).toBe('example.com'); + expect(result.fingerprint!.intent).toBe('search_for_headphones'); + expect(result.fingerprint!.role).toBe('button'); + }); + + it('should extract fingerprint from TYPE action', () => { + const result = extractFingerprintFromOutcome( + makeOutcome({ actionTaken: 'TYPE(1, "laptop")' }), + makeElements(), + defaultOptions + ); + expect(result.extracted).toBe(true); + expect(result.fingerprint!.role).toBe('textbox'); + }); + }); + + describe('applyFingerprintFailure', () => { + it('should increment failure count and reduce confidence', () => { + const fp = makeFingerprint({ successCount: 3, failureCount: 0, confidence: 1.0 }); + const result = applyFingerprintFailure(fp); + expect(result.failureCount).toBe(1); + expect(result.confidence).toBeLessThan(1.0); + }); + + it('should apply exponential decay with multiple failures', () => { + let fp = makeFingerprint({ successCount: 1, failureCount: 0, confidence: 1.0 }); + fp = applyFingerprintFailure(fp); + const c1 = fp.confidence; + fp = applyFingerprintFailure(fp); + const c2 = fp.confidence; + fp = applyFingerprintFailure(fp); + const c3 = fp.confidence; + // Confidence should decrease with each failure + expect(c2).toBeLessThan(c1); + expect(c3).toBeLessThan(c2); + }); + + it('should not go below 0 confidence', () => { + const fp = makeFingerprint({ successCount: 0, failureCount: 100, confidence: 0 }); + const result = applyFingerprintFailure(fp); + expect(result.confidence).toBeGreaterThanOrEqual(0); + }); + }); + + describe('applyFingerprintSuccess', () => { + it('should increment success count and update lastUsedAt', () => { + const fp = makeFingerprint({ successCount: 1, failureCount: 0 }); + const before = Date.now(); + const result = applyFingerprintSuccess(fp); + expect(result.successCount).toBe(2); + expect(result.lastUsedAt).toBeGreaterThanOrEqual(before); + }); + + it('should increase confidence with more successes', () => { + const fp = makeFingerprint({ successCount: 1, failureCount: 2, confidence: 0.3 }); + const result = applyFingerprintSuccess(fp); + expect(result.confidence).toBeGreaterThan(fp.confidence); + }); + + it('should cap confidence at 1', () => { + const fp = makeFingerprint({ successCount: 100, failureCount: 0, confidence: 1.0 }); + const result = applyFingerprintSuccess(fp); + expect(result.confidence).toBeLessThanOrEqual(1); + }); + }); + + describe('isFingerprintStale', () => { + it('should return false for healthy fingerprint', () => { + const fp = makeFingerprint({ failureCount: 1, confidence: 0.5 }); + expect(isFingerprintStale(fp)).toBe(false); + }); + + it('should return true for high failures and low confidence', () => { + const fp = makeFingerprint({ failureCount: 5, confidence: 0.1 }); + expect(isFingerprintStale(fp)).toBe(true); + }); + + it('should return false when confidence is still reasonable despite failures', () => { + const fp = makeFingerprint({ failureCount: 3, confidence: 0.5 }); + expect(isFingerprintStale(fp)).toBe(false); + }); + }); + + describe('isFingerprintExpired', () => { + it('should return false for recent fingerprint', () => { + const fp = makeFingerprint({ learnedAt: Date.now() - 1000 }); + expect(isFingerprintExpired(fp, 60000)).toBe(false); + }); + + it('should return true for old fingerprint', () => { + const fp = makeFingerprint({ learnedAt: Date.now() - 120000 }); + expect(isFingerprintExpired(fp, 60000)).toBe(true); + }); + }); + + describe('fingerprintToHint', () => { + it('should convert a high-confidence fingerprint to a hint', () => { + const fp = makeFingerprint({ confidence: 0.8, textTokens: ['search'], ariaTokens: ['find'] }); + const hint = fingerprintToHint(fp); + expect(hint).not.toBeNull(); + expect(hint!.intent).toBe('search'); + expect(hint!.textPatterns).toContain('search'); + expect(hint!.textPatterns).toContain('find'); + expect(hint!.priority).toBeGreaterThan(0); + }); + + it('should return null for low-confidence fingerprint', () => { + const fp = makeFingerprint({ confidence: 0.2 }); + const hint = fingerprintToHint(fp); + expect(hint).toBeNull(); + }); + + it('should return null for stale fingerprint', () => { + const fp = makeFingerprint({ confidence: 0.5, failureCount: 5 }); + // Make it stale: confidence < 0.3 and failureCount >= 3 + fp.confidence = 0.2; + fp.failureCount = 4; + const hint = fingerprintToHint(fp); + expect(hint).toBeNull(); + }); + + it('should respect custom minConfidence', () => { + const fp = makeFingerprint({ confidence: 0.45 }); + expect(fingerprintToHint(fp, 0.5)).toBeNull(); + expect(fingerprintToHint(fp, 0.4)).not.toBeNull(); + }); + }); + + describe('computeTaskHash', () => { + it('should produce stable hashes', async () => { + const h1 = await computeTaskHash('search for laptops'); + const h2 = await computeTaskHash('search for laptops'); + expect(h1).toBe(h2); + }); + + it('should normalize case and whitespace', async () => { + const h1 = await computeTaskHash(' Search For Laptops '); + const h2 = await computeTaskHash('search for laptops'); + expect(h1).toBe(h2); + }); + + it('should produce different hashes for different tasks', async () => { + const h1 = await computeTaskHash('search for laptops'); + const h2 = await computeTaskHash('book a flight'); + expect(h1).not.toBe(h2); + }); + }); +}); diff --git a/tests/profile-registry.test.ts b/tests/profile-registry.test.ts new file mode 100644 index 0000000..aafeecc --- /dev/null +++ b/tests/profile-registry.test.ts @@ -0,0 +1,170 @@ +import { ProfileRegistry } from '../src/agents/planner-executor/profile-registry'; +import type { BrowserAgentProfile } from '../src/agents/planner-executor/profile-types'; +import { TaskCategory } from '../src/agents/planner-executor/task-category'; + +describe('ProfileRegistry', () => { + let registry: ProfileRegistry; + + beforeEach(() => { + registry = new ProfileRegistry(); + }); + + describe('register()', () => { + it('should register a valid profile', () => { + const profile = makeProfile('test-1', 'Test Profile', { + taskKeywords: ['search'], + }); + registry.register(profile); + expect(registry.list()).toHaveLength(1); + }); + + it('should list registered profiles', () => { + registry.register(makeProfile('a', 'Profile A')); + registry.register(makeProfile('b', 'Profile B')); + expect(registry.list()).toHaveLength(2); + }); + }); + + describe('unregister()', () => { + it('should remove a profile by id', () => { + registry.register(makeProfile('test-1', 'Test')); + expect(registry.unregister('test-1')).toBe(true); + expect(registry.list()).toHaveLength(0); + }); + + it('should return false for unknown id', () => { + expect(registry.unregister('nonexistent')).toBe(false); + }); + }); + + describe('resolve()', () => { + it('should match profile by task keyword', () => { + const profile = makeProfile('shopping', 'Shopping', { + taskKeywords: ['cart', 'buy', 'shop'], + }); + registry.register(profile); + + const result = registry.resolve('Search for laptops and add to cart', 'example.com'); + expect(result.categoryHint).toBe('transaction'); + }); + + it('should match profile by domain pattern', () => { + const profile = makeProfile('travel', 'Travel', { + domainPatterns: ['*.booking.com'], + }); + profile.taskCategoryHint = TaskCategory.SEARCH; + registry.register(profile); + + const result = registry.resolve('Book a hotel', 'www.booking.com'); + expect(result.categoryHint).toBe('search'); + }); + + it('should prefer higher priority profiles', () => { + const lowPriority = makeProfile('low', 'Low Priority', { taskKeywords: ['search'] }); + lowPriority.priority = 1; + lowPriority.pruningPolicy = { + allowedRoles: ['button'], + maxElements: 10, + maxElementsRelaxed: 20, + }; + + const highPriority = makeProfile('high', 'High Priority', { taskKeywords: ['search'] }); + highPriority.priority = 100; + highPriority.pruningPolicy = { + allowedRoles: ['link'], + maxElements: 5, + maxElementsRelaxed: 15, + }; + + registry.register(lowPriority); + registry.register(highPriority); + + const result = registry.resolve('search for something', 'example.com'); + expect(result.pruningPolicy?.allowedRoles).toContain('link'); + }); + + it('should return empty profile when nothing matches', () => { + registry.register(makeProfile('travel', 'Travel', { taskKeywords: ['flight'] })); + + const result = registry.resolve('search for laptops', 'example.com'); + expect(result.categoryHint).toBeUndefined(); + expect(result.heuristicHints).toEqual([]); + }); + + it('should pass through learned fingerprints', () => { + registry.register(makeProfile('base', 'Base', { taskKeywords: ['search'] })); + + const fingerprints = [makeFingerprint('example.com', 'abc123', 'search', 0.9)]; + + const result = registry.resolve('search for something', 'example.com', fingerprints); + expect(result.learnedFingerprints).toHaveLength(1); + }); + }); + + describe('loadFromJSON()', () => { + it('should load valid profile from JSON array', () => { + const json = [ + { + id: 'imported-1', + label: 'Imported Profile', + version: 1, + match: { taskKeywords: ['book'] }, + source: 'imported', + }, + ]; + + const result = registry.loadFromJSON(json); + expect(result.loaded).toBe(1); + expect(result.errors).toHaveLength(0); + }); + + it('should report errors for invalid input', () => { + const result = registry.loadFromJSON([ + { + /* missing required fields */ + }, + ]); + expect(result.loaded).toBe(0); + expect(result.errors.length).toBeGreaterThan(0); + }); + + it('should handle non-array input', () => { + const result = registry.loadFromJSON('not an array'); + expect(result.loaded).toBe(0); + }); + }); +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeProfile( + id: string, + label: string, + match?: { taskKeywords?: string[]; domainPatterns?: string[] } +): BrowserAgentProfile { + return { + id, + label, + version: 1, + match: match || {}, + taskCategoryHint: TaskCategory.TRANSACTION, + source: 'user', + priority: 10, + }; +} + +function makeFingerprint(domain: string, taskHash: string, intent: string, confidence: number) { + return { + domain, + taskHash, + intent, + role: 'button' as const, + textTokens: ['click', 'me'], + successCount: 1, + failureCount: 0, + confidence, + learnedAt: Date.now(), + }; +}