diff --git a/package-lock.json b/package-lock.json
index ab12afa..e54d253 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "@predicatesystems/runtime",
- "version": "1.4.1",
+ "version": "1.4.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "@predicatesystems/runtime",
- "version": "1.4.1",
+ "version": "1.4.2",
"license": "(MIT OR Apache-2.0)",
"dependencies": {
"canvas": "^3.2.1",
diff --git a/src/agents/planner-executor/category-pruner.ts b/src/agents/planner-executor/category-pruner.ts
index 0a30494..3232518 100644
--- a/src/agents/planner-executor/category-pruner.ts
+++ b/src/agents/planner-executor/category-pruner.ts
@@ -7,6 +7,7 @@ import {
} from './pruning-types';
import { TaskCategory } from './task-category';
import { pruneWithPolicy } from './data-driven-pruner';
+import { isTextExtractionTask } from './extraction-keywords';
function textOf(element: SnapshotElement): string {
return String(element.text || element.name || '').toLowerCase();
@@ -141,6 +142,39 @@ function allowSearchRelaxed(element: SnapshotElement): boolean {
return ['button', 'tab', 'menuitem'].includes(roleOf(element));
}
+function allowExtraction(element: SnapshotElement): boolean {
+ const role = roleOf(element);
+ // Nav links are critical for navigating to the data page (e.g., "Show" link on HN)
+ if (role === 'link' && element.href) {
+ return true;
+ }
+ // Search inputs for finding data
+ if (['searchbox', 'textbox', 'combobox'].includes(role)) {
+ return true;
+ }
+ // Buttons for navigation/actions
+ if (role === 'button') {
+ return true;
+ }
+ // Content-bearing elements (table cells, list items, etc.)
+ if (['cell', 'row', 'listitem', 'heading'].includes(role)) {
+ return true;
+ }
+ // High-importance elements likely contain relevant data
+ if (element.importance && element.importance >= 200) {
+ return true;
+ }
+ // Dominant group elements (main content area)
+ if (element.inDominantGroup) {
+ return true;
+ }
+ return false;
+}
+
+function allowExtractionRelaxed(element: SnapshotElement): boolean {
+ return allowExtraction(element) || isInteractive(element);
+}
+
function allowGeneric(element: SnapshotElement): boolean {
return ['button', 'link', 'textbox', 'searchbox', 'combobox', 'checkbox', 'radio'].includes(
roleOf(element)
@@ -209,6 +243,14 @@ function getPolicy(
return { maxNodes: 60, allow: allowShoppingLoose, block: () => false };
}
+ if (category === PruningTaskCategory.EXTRACTION) {
+ // Extraction tasks need: nav links for navigation, content elements for data,
+ // search inputs, and any interactive elements for reaching the data.
+ return relaxationLevel === 0
+ ? { maxNodes: 35, allow: allowExtraction, block: blockCommon }
+ : { maxNodes: 50, allow: allowExtractionRelaxed, block: () => false };
+ }
+
if (category === PruningTaskCategory.FORM_FILLING) {
return relaxationLevel === 0
? { maxNodes: 20, allow: allowFormFilling, block: blockCommon }
@@ -232,12 +274,23 @@ export function detectPruningCategory(
): PruningTaskCategory | null {
const normalizedGoal = goal.toLowerCase();
+ if (taskCategory === TaskCategory.EXTRACTION) {
+ return PruningTaskCategory.EXTRACTION;
+ }
if (taskCategory === TaskCategory.SEARCH) {
return PruningTaskCategory.SEARCH;
}
if (taskCategory === TaskCategory.FORM_FILL) {
return PruningTaskCategory.FORM_FILLING;
}
+
+ // Extraction keyword detection takes priority over TRANSACTION/SHOPPING
+ // because "find the title of X" or "extract Y" on an e-commerce site is
+ // still an extraction task, not a shopping task.
+ if (isTextExtractionTask(normalizedGoal)) {
+ return PruningTaskCategory.EXTRACTION;
+ }
+
if (taskCategory === TaskCategory.TRANSACTION) {
if (normalizedGoal.includes('checkout')) {
return PruningTaskCategory.CHECKOUT;
diff --git a/src/agents/planner-executor/extraction-keywords.ts b/src/agents/planner-executor/extraction-keywords.ts
new file mode 100644
index 0000000..c47db5a
--- /dev/null
+++ b/src/agents/planner-executor/extraction-keywords.ts
@@ -0,0 +1,305 @@
+/**
+ * Text extraction keyword detection for EXTRACT action routing.
+ *
+ * Ported from sdk-python predicate/agents/planner_executor_agent.py
+ * (_is_text_extraction_task / TEXT_EXTRACTION_KEYWORDS).
+ *
+ * Determines whether a planner step should use text-based extraction
+ * (reading page content as markdown and extracting via LLM) rather
+ * than element-targeted actions like CLICK or TYPE.
+ */
+
+// ---------------------------------------------------------------------------
+// Keyword Categories
+// ---------------------------------------------------------------------------
+
+/**
+ * Strong extraction verbs that alone indicate a data extraction task.
+ * These are unambiguous — if the task says "extract" or "scrape", it's extraction.
+ */
+const EXTRACTION_VERBS: readonly string[] = [
+ 'extract',
+ 'read',
+ 'parse',
+ 'scrape',
+ 'retrieve',
+ 'capture',
+ 'grab',
+ 'copy',
+ 'pull',
+];
+
+/**
+ * Ambiguous verbs that need a content noun to confirm extraction intent.
+ * E.g., "find" alone is ambiguous, but "find the title" = extraction.
+ */
+const AMBIGUOUS_VERBS: readonly string[] = [
+ 'find',
+ 'get',
+ 'fetch',
+ 'list',
+ 'show',
+ 'tell',
+ 'display',
+];
+
+/**
+ * Multi-word phrases that strongly indicate extraction.
+ * These are checked first via substring matching.
+ */
+const EXTRACTION_PHRASES: readonly string[] = [
+ 'what is',
+ 'what are',
+ "what's",
+ 'show me',
+ 'tell me',
+ 'find the',
+ 'get the',
+ 'read the',
+ 'list the',
+ 'title of',
+ 'price of',
+ 'name of',
+ 'content of',
+ 'find the text',
+ 'find the title',
+ 'find the price',
+ 'find the name',
+ 'how many',
+ 'how much',
+];
+
+/**
+ * Content/data nouns that indicate the task wants specific information.
+ * Used alongside ambiguous verbs to confirm extraction intent.
+ */
+const CONTENT_NOUNS: readonly string[] = [
+ 'title',
+ 'headline',
+ 'heading',
+ 'text',
+ 'content',
+ 'body',
+ 'paragraph',
+ 'article',
+ 'post',
+ 'message',
+ 'description',
+ 'summary',
+ 'excerpt',
+ 'price',
+ 'cost',
+ 'amount',
+ 'name',
+ 'label',
+ 'value',
+ 'number',
+ 'date',
+ 'time',
+ 'address',
+ 'email',
+ 'phone',
+ 'rating',
+ 'review',
+ 'comment',
+ 'author',
+ 'username',
+ 'table',
+ 'row',
+ 'column',
+ 'item',
+ 'entry',
+ 'record',
+ 'population',
+ 'score',
+ 'count',
+ 'total',
+ 'average',
+ 'statistic',
+ 'stat',
+ 'link',
+ 'url',
+ 'image',
+ 'photo',
+];
+
+/**
+ * Legacy keyword list kept for backward compatibility with tests.
+ * @deprecated Use the categorised constants above for new code.
+ */
+export const TEXT_EXTRACTION_KEYWORDS: readonly string[] = [
+ ...EXTRACTION_VERBS,
+ ...EXTRACTION_PHRASES,
+ ...CONTENT_NOUNS,
+];
+
+// ---------------------------------------------------------------------------
+// Detection
+// ---------------------------------------------------------------------------
+
+/**
+ * Determine if a task/step description is a text extraction task.
+ *
+ * Uses a tiered detection strategy to balance precision and recall:
+ *
+ * 1. **Strong extraction phrases** ("what is", "find the", "title of"):
+ * These multi-word patterns are highly specific → immediate match.
+ *
+ * 2. **Strong extraction verbs** ("extract", "scrape", "read"):
+ * These verbs unambiguously indicate extraction → match on their own.
+ *
+ * 3. **Ambiguous verb + content noun combos** ("find" + "title"):
+ * Verbs like "find" or "get" are ambiguous alone, but when paired with
+ * a content noun ("title", "price", "name") they indicate extraction.
+ * This prevents false positives like "find a product" (no content noun).
+ *
+ * Ported from sdk-python `_is_text_extraction_task()` with improved precision.
+ *
+ * @param task - The task or step description to analyse
+ * @returns true if this looks like a text extraction task
+ */
+export function isTextExtractionTask(task: string): boolean {
+ if (!task) {
+ return false;
+ }
+
+ const taskLower = task.toLowerCase();
+
+ // Tier 1: Strong extraction phrases (multi-word substring match)
+ for (const phrase of EXTRACTION_PHRASES) {
+ if (taskLower.includes(phrase)) {
+ return true;
+ }
+ }
+
+ // Tier 2: Strong extraction verbs (word boundary match)
+ for (const verb of EXTRACTION_VERBS) {
+ if (new RegExp(`\\b${escapeRegExp(verb)}(s|ed|ing)?\\b`).test(taskLower)) {
+ return true;
+ }
+ }
+
+ // Tier 3: Ambiguous verb + content noun combo
+ // E.g., "find the title" = yes, "find a product" = no, "list all reviews" = yes
+ const hasAmbiguousVerb = AMBIGUOUS_VERBS.some(verb =>
+ new RegExp(`\\b${escapeRegExp(verb)}\\b`).test(taskLower)
+ );
+ if (hasAmbiguousVerb) {
+ const hasContentNoun = CONTENT_NOUNS.some(noun =>
+ new RegExp(`\\b${escapeRegExp(noun)}(s|es)?\\b`).test(taskLower)
+ );
+ if (hasContentNoun) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// ---------------------------------------------------------------------------
+// Extraction prompt builder
+// ---------------------------------------------------------------------------
+
+/**
+ * Build the executor prompt for extracting text from page content.
+ *
+ * @param pageContent - Page content (markdown or compact representation)
+ * @param extractQuery - What to extract
+ * @returns Tuple of [systemPrompt, userPrompt]
+ */
+export function buildExtractionPrompt(pageContent: string, extractQuery: string): [string, string] {
+ // NOTE: /no_think MUST be at the START of user message for Qwen3 models.
+ // Without it, Qwen3 puts the answer in tags and content is empty.
+ const system = `You extract specific text from page content. Return only the extracted text. Do NOT output any thinking, reasoning, or explanation.`;
+
+ const user = `/no_think
+You are a text extraction assistant. Given the page content below, extract the specific information requested.
+
+PAGE CONTENT:
+${pageContent}
+
+EXTRACTION REQUEST:
+${extractQuery}
+
+INSTRUCTIONS:
+1. Read the content carefully
+2. Find and extract ONLY the specific information requested
+3. Return ONLY the extracted text, nothing else
+4. If the information is not found, return "NOT_FOUND"
+
+EXTRACTED TEXT:`;
+
+ return [system, user];
+}
+
+/**
+ * Check if a task description contains extraction-related keywords
+ * that should trigger extraction-specific planner guidance.
+ */
+export function isExtractionTask(task: string): boolean {
+ if (!task) return false;
+ const taskLower = task.toLowerCase();
+ return (
+ taskLower.includes('extract') ||
+ taskLower.includes('get the') ||
+ taskLower.includes('what is') ||
+ taskLower.includes('read the') ||
+ taskLower.includes('find the text') ||
+ taskLower.includes('scrape') ||
+ taskLower.includes('title of') ||
+ taskLower.includes('price of') ||
+ taskLower.includes('name of') ||
+ taskLower.includes('content of') ||
+ taskLower.includes('headline of') ||
+ taskLower.includes('rating of') ||
+ taskLower.includes('review of')
+ );
+}
+
+/**
+ * Get extraction-specific domain guidance for the planner prompt.
+ *
+ * This is injected into the planner system prompt when the task
+ * is detected as an extraction task, instructing the planner to
+ * use EXTRACT instead of CLICK for data that is already visible.
+ */
+export function getExtractionDomainGuidance(): string {
+ return `
+
+IMPORTANT: Extraction Task Planning Rules
+=========================================
+For extraction tasks where data is already visible on the page:
+
+1. If the data you need is VISIBLE in the page context above:
+ - Use EXTRACT directly as the ONLY step - no clicking needed
+ - The EXTRACT action will read the visible text from the page
+
+2. If you need to navigate to see the data:
+ - First CLICK or NAVIGATE to the right page
+ - Then use EXTRACT
+
+CRITICAL: Do NOT click on links to external sites when extracting.
+- Post/article titles often link to EXTERNAL sites
+- To extract a title that is visible, use EXTRACT directly on the current page
+- Only click if you need to navigate to a detail page (e.g., for comments)
+
+Example for "Extract the title of the first post":
+{
+ "action": "EXTRACT",
+ "target": "first post title",
+ "goal": "Extract the first post title from the page",
+ "verify": []
+}
+`;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Escape special regex characters in a string.
+ */
+function escapeRegExp(str: string): string {
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
diff --git a/src/agents/planner-executor/index.ts b/src/agents/planner-executor/index.ts
index e391cdf..5deacad 100644
--- a/src/agents/planner-executor/index.ts
+++ b/src/agents/planner-executor/index.ts
@@ -196,6 +196,13 @@ export {
computeTaskHash as computeLearningTaskHash,
} from './learning-extractor';
export type { LearningExtractionOptions, LearningExtractionResult } from './learning-extractor';
+export {
+ TEXT_EXTRACTION_KEYWORDS,
+ isTextExtractionTask,
+ isExtractionTask,
+ getExtractionDomainGuidance,
+ buildExtractionPrompt,
+} from './extraction-keywords';
// Runtime (Playwright/Chromium)
export {
diff --git a/src/agents/planner-executor/plan-models.ts b/src/agents/planner-executor/plan-models.ts
index ef6621a..4ace0b9 100644
--- a/src/agents/planner-executor/plan-models.ts
+++ b/src/agents/planner-executor/plan-models.ts
@@ -168,7 +168,7 @@ export interface RepairHistoryEntry {
export interface ActionRecord {
/** Step number (1-indexed) */
stepNum: number;
- /** Action type (CLICK, TYPE_AND_SUBMIT, SCROLL, etc.) */
+ /** Action type (CLICK, TYPE_AND_SUBMIT, SCROLL, EXTRACT, etc.) */
action: string;
/** Element description or URL */
target: string | null;
@@ -176,6 +176,8 @@ export interface ActionRecord {
result: string;
/** URL after action completed */
urlAfter: string | null;
+ /** Extracted data for EXTRACT actions (the text that was extracted) */
+ extractedData?: string | null;
}
// ---------------------------------------------------------------------------
diff --git a/src/agents/planner-executor/plan-utils.ts b/src/agents/planner-executor/plan-utils.ts
index f43a4a9..96904a1 100644
--- a/src/agents/planner-executor/plan-utils.ts
+++ b/src/agents/planner-executor/plan-utils.ts
@@ -168,6 +168,56 @@ function repairJson(text: string): string {
return repaired;
}
+/**
+ * Extract the first balanced JSON object from a string.
+ *
+ * Handles cases where the LLM outputs multiple concatenated JSON objects:
+ * {"action":"EXTRACT",...},{"action":"CLICK",...}
+ *
+ * Uses brace-depth counting to find the first complete `{...}` block.
+ *
+ * @returns The first balanced JSON object string, or null if not found
+ */
+function extractFirstJsonObject(text: string): string | null {
+ const start = text.indexOf('{');
+ if (start === -1) return null;
+
+ let depth = 0;
+ let inString = false;
+ let escape = false;
+
+ for (let i = start; i < text.length; i++) {
+ const ch = text[i];
+
+ if (escape) {
+ escape = false;
+ continue;
+ }
+
+ if (ch === '\\' && inString) {
+ escape = true;
+ continue;
+ }
+
+ if (ch === '"') {
+ inString = !inString;
+ continue;
+ }
+
+ if (inString) continue;
+
+ if (ch === '{') depth++;
+ else if (ch === '}') {
+ depth--;
+ if (depth === 0) {
+ return text.slice(start, i + 1);
+ }
+ }
+ }
+
+ return null;
+}
+
/**
* Extract JSON from LLM response that may contain markdown or prose.
*
@@ -203,6 +253,24 @@ export function extractJson(content: string): Record {
}
// Try to find JSON object in text
+ // First try to extract just the FIRST balanced JSON object to handle
+ // cases where LLM outputs multiple objects like: {"action":"EXTRACT",...},{"action":"CLICK",...}
+ const firstObj = extractFirstJsonObject(cleaned);
+ if (firstObj) {
+ try {
+ return JSON.parse(firstObj);
+ } catch {
+ // Try to repair common JSON issues: unquoted keys (valid JS but not valid JSON)
+ try {
+ const repaired = repairJson(firstObj);
+ return JSON.parse(repaired);
+ } catch {
+ // Continue to last resort
+ }
+ }
+ }
+
+ // Fallback: greedy match (original behavior)
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
if (jsonMatch) {
try {
diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts
index e19827d..2c1f4bc 100644
--- a/src/agents/planner-executor/planner-executor-agent.ts
+++ b/src/agents/planner-executor/planner-executor-agent.ts
@@ -75,6 +75,11 @@ import {
import { detectPruningCategory } from './category-pruner';
import { pruneWithRecovery, fullSnapshotContainsIntent } from './pruning-recovery';
import type { Tracer } from '../../tracing/tracer';
+import {
+ isTextExtractionTask,
+ isExtractionTask,
+ buildExtractionPrompt,
+} from './extraction-keywords';
// ---------------------------------------------------------------------------
// Token Usage Collector
@@ -442,6 +447,9 @@ export interface AgentRuntime {
/** Scroll by delta (returns true if scroll was effective) */
scrollBy(dy: number): Promise;
+
+ /** Read page content as markdown (for EXTRACT actions) */
+ readMarkdown?(options?: { maxChars?: number }): Promise;
}
// ---------------------------------------------------------------------------
@@ -1152,12 +1160,24 @@ export class PlannerExecutorAgent {
// Record action history after any auth-boundary or optional-substep recovery.
if (!actionHistoryRecorded) {
+ // For EXTRACT actions, include the extracted data so the planner
+ // knows what was already extracted and can avoid repeating
+ const extractedText =
+ plannerAction.action === 'EXTRACT' && finalOutcome.extractedData
+ ? typeof finalOutcome.extractedData === 'object' &&
+ finalOutcome.extractedData !== null &&
+ 'text' in (finalOutcome.extractedData as Record)
+ ? ((finalOutcome.extractedData as Record).text as string)
+ : JSON.stringify(finalOutcome.extractedData)
+ : undefined;
+
this.actionHistory.push({
stepNum,
action: plannerAction.action,
target: this.summarizePlannerActionTarget(plannerAction),
result: finalOutcome.status === StepStatus.SUCCESS ? 'success' : 'failed',
urlAfter,
+ extractedData: extractedText || undefined,
});
}
@@ -1174,6 +1194,26 @@ export class PlannerExecutorAgent {
success = true;
}
+ // Auto-complete extraction tasks: if the action was a successful EXTRACT
+ // and the overall task is an extraction task, the goal is achieved.
+ // This prevents infinite EXTRACT loops on extraction-focused tasks.
+ // Uses isTextExtractionTask (comprehensive) rather than isExtractionTask (simpler)
+ // to cover more extraction patterns across any website.
+ if (
+ !success &&
+ plannerAction.action === 'EXTRACT' &&
+ finalOutcome.status === StepStatus.SUCCESS &&
+ finalOutcome.extractedData &&
+ isTextExtractionTask(task)
+ ) {
+ if (this.config.verbose) {
+ console.log(
+ `[EXTRACT] Extraction task completed successfully, transitioning to DONE`
+ );
+ }
+ success = true;
+ }
+
if (this.recoveryState && this.config.recovery.trackSuccessfulUrls && urlAfter) {
this.recoveryState.recordCheckpoint({
url: urlAfter,
@@ -1493,6 +1533,149 @@ export class PlannerExecutorAgent {
};
}
+ // Handle EXTRACT action — read page content and extract data via LLM
+ if (plannerAction.action === 'EXTRACT') {
+ const extractQuery =
+ plannerAction.goal ||
+ plannerAction.intent ||
+ plannerAction.target ||
+ task ||
+ 'Extract relevant data from the current page';
+
+ if (this.config.verbose) {
+ console.log(`[ACTION] EXTRACT - query: "${extractQuery}"`);
+ }
+
+ try {
+ // Determine extraction path
+ const useMarkdown = isTextExtractionTask(extractQuery);
+
+ if (useMarkdown && runtime.readMarkdown) {
+ // Text-based extraction: read page as markdown, then use executor LLM
+ const pageContent = await runtime.readMarkdown({ maxChars: 8000 });
+
+ if (!pageContent) {
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: 'Failed to read page content as markdown',
+ };
+ }
+
+ if (this.config.verbose) {
+ const preview = pageContent.slice(0, 160).replace(/\n/g, ' ');
+ console.log(` [ACTION] EXTRACT - got markdown: ${preview}...`);
+ }
+
+ // Build extraction prompt and call executor LLM
+ const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery);
+ const extractResp = await this.executor.generate(extSystem, extUser, {
+ temperature: 0.0,
+ max_tokens: 500,
+ });
+ this.recordTokenUsage('extract', extractResp);
+
+ const extractedText = (extractResp.content || '').trim();
+ if (extractedText && extractedText !== 'NOT_FOUND') {
+ if (this.config.verbose) {
+ console.log(` [ACTION] EXTRACT ok: ${extractedText.slice(0, 160)}`);
+ }
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.SUCCESS,
+ actionTaken: 'EXTRACT',
+ verificationPassed: true,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: currentUrl,
+ extractedData: { text: extractedText, query: extractQuery },
+ };
+ } else {
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: `Could not find requested data: ${extractQuery}`,
+ };
+ }
+ } else {
+ // Fallback: use compact snapshot context for extraction
+ const pageContent = ctx.compactRepresentation;
+ if (!pageContent || pageContent.trim().length === 0) {
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: 'No page content available for extraction',
+ };
+ }
+
+ const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery);
+ const extractResp = await this.executor.generate(extSystem, extUser, {
+ temperature: 0.0,
+ max_tokens: 500,
+ });
+ this.recordTokenUsage('extract', extractResp);
+
+ const extractedText = (extractResp.content || '').trim();
+ if (extractedText && extractedText !== 'NOT_FOUND') {
+ if (this.config.verbose) {
+ console.log(` [ACTION] EXTRACT ok (snapshot): ${extractedText.slice(0, 160)}`);
+ }
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.SUCCESS,
+ actionTaken: 'EXTRACT',
+ verificationPassed: true,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ urlBefore: currentUrl,
+ urlAfter: currentUrl,
+ extractedData: { text: extractedText, query: extractQuery },
+ };
+ } else {
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: `Could not extract requested data: ${extractQuery}`,
+ };
+ }
+ }
+ } catch (e) {
+ return {
+ stepId: stepNum,
+ goal: extractQuery,
+ status: StepStatus.FAILED,
+ actionTaken: 'EXTRACT',
+ verificationPassed: false,
+ usedVision: false,
+ durationMs: Date.now() - stepStart,
+ error: e instanceof Error ? e.message : String(e),
+ };
+ }
+ }
+
// For CLICK and TYPE_AND_SUBMIT, we need to find the element
const isTypeAction = plannerAction.action === 'TYPE_AND_SUBMIT';
@@ -2400,6 +2583,10 @@ export class PlannerExecutorAgent {
return plannerAction;
}
+ if (isExtractionTask(task)) {
+ return plannerAction;
+ }
+
const candidate = this.findVisibleResultLink(task, ctx.snapshot);
if (!candidate) {
return plannerAction;
diff --git a/src/agents/planner-executor/playwright-runtime.ts b/src/agents/planner-executor/playwright-runtime.ts
index b392b56..2b5627c 100644
--- a/src/agents/planner-executor/playwright-runtime.ts
+++ b/src/agents/planner-executor/playwright-runtime.ts
@@ -437,6 +437,41 @@ export class PlaywrightRuntime implements AgentRuntime {
return page.url();
}
+ /**
+ * Read the current page content as cleaned markdown-like text.
+ * Uses innerText extraction and strips excessive whitespace.
+ */
+ async readMarkdown(options?: { maxChars?: number }): Promise {
+ this.ensureStarted();
+
+ const page = this.browser.getPage();
+ if (!page) {
+ throw new Error('Page not available');
+ }
+
+ try {
+ const text = await page.locator('body').innerText({ timeout: 5000 });
+ const maxChars = options?.maxChars ?? 50000;
+ let result = text.replace(/\n{3,}/g, '\n\n').trim();
+ if (result.length > maxChars) {
+ result = result.slice(0, maxChars) + '\n\n[... content truncated ...]';
+ }
+ return result || null;
+ } catch {
+ // Fallback: use evaluate to get document body textContent
+ const text = await page.evaluate(() => {
+ const el = document.querySelector('body');
+ return el?.textContent ?? '';
+ });
+ const maxChars = options?.maxChars ?? 50000;
+ let result = text.replace(/\n{3,}/g, '\n\n').trim();
+ if (result.length > maxChars) {
+ result = result.slice(0, maxChars) + '\n\n[... content truncated ...]';
+ }
+ return result || null;
+ }
+ }
+
/**
* Get the viewport height.
*/
diff --git a/src/agents/planner-executor/prompts.ts b/src/agents/planner-executor/prompts.ts
index 0f2e44b..a987eba 100644
--- a/src/agents/planner-executor/prompts.ts
+++ b/src/agents/planner-executor/prompts.ts
@@ -6,6 +6,7 @@
*/
import type { ActionRecord } from './plan-models';
+import { isExtractionTask, getExtractionDomainGuidance } from './extraction-keywords';
// ---------------------------------------------------------------------------
// Stepwise Planner Prompt (ReAct-style)
@@ -39,6 +40,9 @@ export function buildStepwisePlannerPrompt(
if (rec.urlAfter) {
historyText += ` [URL: ${rec.urlAfter.slice(0, 60)}...]`;
}
+ if (rec.extractedData) {
+ historyText += ` [EXTRACTED: ${rec.extractedData.slice(0, 120)}]`;
+ }
historyText += '\n';
}
historyText += '\n';
@@ -98,12 +102,15 @@ RULES:
10. Do NOT return DONE until ALL parts of the goal are complete
11. Never copy example URLs from these instructions. Only NAVIGATE to a URL from the user's task, the current page, or a visible element.`;
+ // Inject extraction-specific guidance when the goal is an extraction task
+ const extractionGuidance = isExtractionTask(goal) ? getExtractionDomainGuidance() : '';
+
// NOTE: /no_think MUST be at the START of user message for Qwen3 models
const user = `/no_think
Goal: ${goal}
Current URL: ${currentUrl}
-
+${extractionGuidance}
${historyText}Current page elements (ID|role|text|importance|clickable|...):
${pageContext}
diff --git a/src/agents/planner-executor/pruning-types.ts b/src/agents/planner-executor/pruning-types.ts
index d981d6e..c372a72 100644
--- a/src/agents/planner-executor/pruning-types.ts
+++ b/src/agents/planner-executor/pruning-types.ts
@@ -6,6 +6,7 @@ export enum PruningTaskCategory {
FORM_FILLING = 'form_filling',
SEARCH = 'search',
CHECKOUT = 'checkout',
+ EXTRACTION = 'extraction',
GENERIC = 'generic',
}
diff --git a/src/llm-provider.ts b/src/llm-provider.ts
index 1f6c88f..59acc33 100644
--- a/src/llm-provider.ts
+++ b/src/llm-provider.ts
@@ -216,6 +216,24 @@ export class LocalLLMProvider extends LLMProvider {
// Don't extract NONE - if model is still reasoning, let it continue
}
}
+
+ // Final fallback: if no structured pattern matched, use the last
+ // non-empty line of reasoning as the content. This handles plain text
+ // answers from extraction tasks where the model thinks but doesn't
+ // produce content outside the reasoning field.
+ if (!content) {
+ const lines = message.reasoning.split('\n');
+ for (let i = lines.length - 1; i >= 0; i--) {
+ const line = lines[i].trim();
+ if (line && !line.startsWith('```') && !line.startsWith('//')) {
+ content = line;
+ console.log(
+ `[LocalLLMProvider DEBUG] Extracted last reasoning line as content: ${content.slice(0, 100)}`
+ );
+ break;
+ }
+ }
+ }
}
const usage = data?.usage;
@@ -377,14 +395,11 @@ export class OllamaProvider extends LocalLLMProvider {
options: Record = {}
): Promise {
// For Qwen3 models, add think: false to disable reasoning output
- // Ollama OpenAI-compatible API passes model options via 'options' field
+ // Ollama OpenAI-compatible API expects 'think' as a TOP-LEVEL field,
+ // NOT nested under 'options'. See: https://github.com/ollama/ollama/blob/main/docs/openai.md
const ollamaOptions = { ...options };
if (this._disableThinking) {
- // Merge with existing options if any
- ollamaOptions.options = {
- ...(ollamaOptions.options || {}),
- think: false,
- };
+ ollamaOptions.think = false;
}
return super.generate(systemPrompt, userPrompt, ollamaOptions);
}