Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 43 additions & 15 deletions src/agents/planner-executor/extraction-keywords.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ const AMBIGUOUS_VERBS: readonly string[] = [
'show',
'tell',
'display',
'provide',
'report',
'give',
'identify',
'collect',
'gather',
'return',
'output',
];

/**
Expand All @@ -67,6 +75,14 @@ const EXTRACTION_PHRASES: readonly string[] = [
'find the name',
'how many',
'how much',
'sale price',
'sale prices',
'first 5',
'first 10',
'first 3',
'top 5',
'top 10',
'top 3',
];

/**
Expand Down Expand Up @@ -121,6 +137,9 @@ const CONTENT_NOUNS: readonly string[] = [
'url',
'image',
'photo',
'product',
'results',
'listings',
];

/**
Expand Down Expand Up @@ -267,29 +286,38 @@ export function getExtractionDomainGuidance(): string {
return `

IMPORTANT: Extraction Task Planning Rules
=========================================
For extraction tasks where data is already visible on the page:
========================================

1. If the data you need is VISIBLE in the page context above:
- Use EXTRACT directly as the ONLY step - no clicking needed
- The EXTRACT action will read the visible text from the page
STEP 1 - CHECK CURRENT URL:
Before choosing an action, compare the Current URL to the goal.
- Does the current page contain the data requested?
- If the goal mentions a specific section/page (e.g., "show hn", "top stories", "/show"), check if the URL matches.
- If you are NOT on the right page, NAVIGATE to the correct URL first.

2. If you need to navigate to see the data:
- First CLICK or NAVIGATE to the right page
- Then use EXTRACT
STEP 2 - EXTRACT VISIBLE DATA:
If the data is VISIBLE in the page context:
- Use EXTRACT directly - no clicking needed
- The EXTRACT action reads visible text from the current page

CRITICAL: Do NOT click on links to external sites when extracting.
- Post/article titles often link to EXTERNAL sites
- To extract a title that is visible, use EXTRACT directly on the current page
- Only click if you need to navigate to a detail page (e.g., for comments)

Example for "Extract the title of the first post":
{
"action": "EXTRACT",
"target": "first post title",
"goal": "Extract the first post title from the page",
"verify": []
}
Example - wrong page, need to navigate first:
Goal: "extract the title of the first showhn post on hackernews show"
Current URL: news.ycombinator.com/news (wrong page, need /show)
{"action":"NAVIGATE","target":"https://news.ycombinator.com/show","verify":[{"predicate":"url_contains","args":["show"]}],"reasoning":"navigate to Show HN page"}

Example - on correct page, extract directly:
Goal: "extract the title of the first showhn post"
Current URL: news.ycombinator.com/show (correct page, data visible)
{"action":"EXTRACT","target":"first ShowHN post title","goal":"Extract the title of the first ShowHN post","verify":[],"reasoning":"data is visible on current page"}

Example - product price on listing page:
Goal: "find the price of the first laptop"
Current URL: store.com/laptops (correct page, prices visible)
{"action":"EXTRACT","target":"price of first laptop","goal":"Extract the price of the first laptop listing","verify":[],"reasoning":"prices are visible in listing elements"}
`;
}

Expand Down
13 changes: 8 additions & 5 deletions src/agents/planner-executor/plan-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ export type ActionType = z.infer<typeof ActionType>;
* Type for a plan step.
*/
export interface PlanStep {
id: number;
goal: string;
id?: number;
goal?: string;
action: ActionType;
target?: string;
intent?: string;
Expand Down Expand Up @@ -87,12 +87,15 @@ const HeuristicHintSchema = z.object({

export const PlanStepSchema = z.lazy(() =>
z.object({
id: z.number().describe('Step ID (1-indexed, contiguous)'),
goal: z.string().describe('Human-readable goal for this step'),
id: z.number().optional().describe('Step ID (1-indexed, contiguous)'),
goal: z.string().optional().describe('Human-readable goal for this step'),
action: ActionType.describe(
'Action type: NAVIGATE, CLICK, TYPE, TYPE_AND_SUBMIT, SCROLL, PRESS, WAIT, EXTRACT, STUCK, DONE'
),
target: z.string().optional().describe('URL for NAVIGATE action'),
target: z
.union([z.string(), z.record(z.string(), z.unknown())])
.optional()
.describe('URL for NAVIGATE action'),
intent: z.string().optional().describe('Intent hint for CLICK action'),
input: z.string().optional().describe('Text for TYPE_AND_SUBMIT action'),
verify: z.array(PredicateSpecSchema).default([]).describe('Verification predicates'),
Expand Down
20 changes: 15 additions & 5 deletions src/agents/planner-executor/plan-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ function normalizeStep(step: Record<string, unknown>): Record<string, unknown> {
if ('target' in normalizedStep && typeof normalizedStep.target === 'number') {
normalizedStep.target = String(normalizedStep.target);
}
if (
'target' in normalizedStep &&
typeof normalizedStep.target === 'object' &&
normalizedStep.target !== null
) {
normalizedStep.target = JSON.stringify(normalizedStep.target);
}
if ('target' in normalizedStep && normalizedStep.target === null) {
delete normalizedStep.target;
}
Expand Down Expand Up @@ -629,33 +636,36 @@ export function validatePlanSmoothness(plan: Plan): string[] {

// Check each step
let prevAction: string | null = null;
let prevId: number | undefined = undefined;
for (const step of plan.steps) {
const stepLabel = step.id ?? '?';
// Check for missing verification
if ((!step.verify || step.verify.length === 0) && step.required !== false) {
warnings.push(`Step ${step.id} has no verification predicates`);
warnings.push(`Step ${stepLabel} has no verification predicates`);
}

// Check for consecutive same actions (might indicate loop)
if (step.action === prevAction && step.action === 'CLICK') {
warnings.push(`Steps ${step.id - 1} and ${step.id} both use ${step.action}`);
warnings.push(`Steps ${prevId ?? '?'} and ${stepLabel} both use ${step.action}`);
}

// Check for NAVIGATE without target
if (step.action === 'NAVIGATE' && !step.target) {
warnings.push(`Step ${step.id} is NAVIGATE but has no target URL`);
warnings.push(`Step ${stepLabel} is NAVIGATE but has no target URL`);
}

// Check for CLICK without intent
if (step.action === 'CLICK' && !step.intent) {
warnings.push(`Step ${step.id} is CLICK but has no intent hint`);
warnings.push(`Step ${stepLabel} is CLICK but has no intent hint`);
}

// Check for TYPE_AND_SUBMIT without input
if (step.action === 'TYPE_AND_SUBMIT' && !step.input) {
warnings.push(`Step ${step.id} is TYPE_AND_SUBMIT but has no input`);
warnings.push(`Step ${stepLabel} is TYPE_AND_SUBMIT but has no input`);
}

prevAction = step.action;
prevId = step.id;
}

return warnings;
Expand Down
Loading
Loading