From 71fc290dc72d24f5b0e89338abce7c7257d7608b Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 3 May 2026 12:27:35 -0700 Subject: [PATCH 1/2] extraction support ported --- package-lock.json | 4 +- .../planner-executor/category-pruner.ts | 53 ++++ .../planner-executor/extraction-keywords.ts | 299 ++++++++++++++++++ src/agents/planner-executor/index.ts | 7 + src/agents/planner-executor/plan-utils.ts | 68 ++++ .../planner-executor-agent.ts | 156 +++++++++ .../planner-executor/playwright-runtime.ts | 35 ++ src/agents/planner-executor/prompts.ts | 6 +- src/agents/planner-executor/pruning-types.ts | 1 + src/llm-provider.ts | 18 ++ 10 files changed, 644 insertions(+), 3 deletions(-) create mode 100644 src/agents/planner-executor/extraction-keywords.ts diff --git a/package-lock.json b/package-lock.json index ab12afa..e54d253 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@predicatesystems/runtime", - "version": "1.4.1", + "version": "1.4.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@predicatesystems/runtime", - "version": "1.4.1", + "version": "1.4.2", "license": "(MIT OR Apache-2.0)", "dependencies": { "canvas": "^3.2.1", diff --git a/src/agents/planner-executor/category-pruner.ts b/src/agents/planner-executor/category-pruner.ts index 0a30494..3232518 100644 --- a/src/agents/planner-executor/category-pruner.ts +++ b/src/agents/planner-executor/category-pruner.ts @@ -7,6 +7,7 @@ import { } from './pruning-types'; import { TaskCategory } from './task-category'; import { pruneWithPolicy } from './data-driven-pruner'; +import { isTextExtractionTask } from './extraction-keywords'; function textOf(element: SnapshotElement): string { return String(element.text || element.name || '').toLowerCase(); @@ -141,6 +142,39 @@ function allowSearchRelaxed(element: SnapshotElement): boolean { return ['button', 'tab', 'menuitem'].includes(roleOf(element)); } +function allowExtraction(element: SnapshotElement): boolean { + const role = roleOf(element); + // Nav links are critical for navigating to the data page (e.g., "Show" link on HN) + if (role === 'link' && element.href) { + return true; + } + // Search inputs for finding data + if (['searchbox', 'textbox', 'combobox'].includes(role)) { + return true; + } + // Buttons for navigation/actions + if (role === 'button') { + return true; + } + // Content-bearing elements (table cells, list items, etc.) + if (['cell', 'row', 'listitem', 'heading'].includes(role)) { + return true; + } + // High-importance elements likely contain relevant data + if (element.importance && element.importance >= 200) { + return true; + } + // Dominant group elements (main content area) + if (element.inDominantGroup) { + return true; + } + return false; +} + +function allowExtractionRelaxed(element: SnapshotElement): boolean { + return allowExtraction(element) || isInteractive(element); +} + function allowGeneric(element: SnapshotElement): boolean { return ['button', 'link', 'textbox', 'searchbox', 'combobox', 'checkbox', 'radio'].includes( roleOf(element) @@ -209,6 +243,14 @@ function getPolicy( return { maxNodes: 60, allow: allowShoppingLoose, block: () => false }; } + if (category === PruningTaskCategory.EXTRACTION) { + // Extraction tasks need: nav links for navigation, content elements for data, + // search inputs, and any interactive elements for reaching the data. + return relaxationLevel === 0 + ? { maxNodes: 35, allow: allowExtraction, block: blockCommon } + : { maxNodes: 50, allow: allowExtractionRelaxed, block: () => false }; + } + if (category === PruningTaskCategory.FORM_FILLING) { return relaxationLevel === 0 ? { maxNodes: 20, allow: allowFormFilling, block: blockCommon } @@ -232,12 +274,23 @@ export function detectPruningCategory( ): PruningTaskCategory | null { const normalizedGoal = goal.toLowerCase(); + if (taskCategory === TaskCategory.EXTRACTION) { + return PruningTaskCategory.EXTRACTION; + } if (taskCategory === TaskCategory.SEARCH) { return PruningTaskCategory.SEARCH; } if (taskCategory === TaskCategory.FORM_FILL) { return PruningTaskCategory.FORM_FILLING; } + + // Extraction keyword detection takes priority over TRANSACTION/SHOPPING + // because "find the title of X" or "extract Y" on an e-commerce site is + // still an extraction task, not a shopping task. + if (isTextExtractionTask(normalizedGoal)) { + return PruningTaskCategory.EXTRACTION; + } + if (taskCategory === TaskCategory.TRANSACTION) { if (normalizedGoal.includes('checkout')) { return PruningTaskCategory.CHECKOUT; diff --git a/src/agents/planner-executor/extraction-keywords.ts b/src/agents/planner-executor/extraction-keywords.ts new file mode 100644 index 0000000..9aa87a2 --- /dev/null +++ b/src/agents/planner-executor/extraction-keywords.ts @@ -0,0 +1,299 @@ +/** + * Text extraction keyword detection for EXTRACT action routing. + * + * Ported from sdk-python predicate/agents/planner_executor_agent.py + * (_is_text_extraction_task / TEXT_EXTRACTION_KEYWORDS). + * + * Determines whether a planner step should use text-based extraction + * (reading page content as markdown and extracting via LLM) rather + * than element-targeted actions like CLICK or TYPE. + */ + +// --------------------------------------------------------------------------- +// Keyword Categories +// --------------------------------------------------------------------------- + +/** + * Strong extraction verbs that alone indicate a data extraction task. + * These are unambiguous — if the task says "extract" or "scrape", it's extraction. + */ +const EXTRACTION_VERBS: readonly string[] = [ + 'extract', + 'read', + 'parse', + 'scrape', + 'retrieve', + 'capture', + 'grab', + 'copy', + 'pull', +]; + +/** + * Ambiguous verbs that need a content noun to confirm extraction intent. + * E.g., "find" alone is ambiguous, but "find the title" = extraction. + */ +const AMBIGUOUS_VERBS: readonly string[] = [ + 'find', + 'get', + 'fetch', + 'list', + 'show', + 'tell', + 'display', +]; + +/** + * Multi-word phrases that strongly indicate extraction. + * These are checked first via substring matching. + */ +const EXTRACTION_PHRASES: readonly string[] = [ + 'what is', + 'what are', + "what's", + 'show me', + 'tell me', + 'find the', + 'get the', + 'read the', + 'list the', + 'title of', + 'price of', + 'name of', + 'content of', + 'find the text', + 'find the title', + 'find the price', + 'find the name', + 'how many', + 'how much', +]; + +/** + * Content/data nouns that indicate the task wants specific information. + * Used alongside ambiguous verbs to confirm extraction intent. + */ +const CONTENT_NOUNS: readonly string[] = [ + 'title', + 'headline', + 'heading', + 'text', + 'content', + 'body', + 'paragraph', + 'article', + 'post', + 'message', + 'description', + 'summary', + 'excerpt', + 'price', + 'cost', + 'amount', + 'name', + 'label', + 'value', + 'number', + 'date', + 'time', + 'address', + 'email', + 'phone', + 'rating', + 'review', + 'comment', + 'author', + 'username', + 'table', + 'row', + 'column', + 'item', + 'entry', + 'record', + 'population', + 'score', + 'count', + 'total', + 'average', + 'statistic', + 'stat', + 'link', + 'url', + 'image', + 'photo', +]; + +/** + * Legacy keyword list kept for backward compatibility with tests. + * @deprecated Use the categorised constants above for new code. + */ +export const TEXT_EXTRACTION_KEYWORDS: readonly string[] = [ + ...EXTRACTION_VERBS, + ...EXTRACTION_PHRASES, + ...CONTENT_NOUNS, +]; + +// --------------------------------------------------------------------------- +// Detection +// --------------------------------------------------------------------------- + +/** + * Determine if a task/step description is a text extraction task. + * + * Uses a tiered detection strategy to balance precision and recall: + * + * 1. **Strong extraction phrases** ("what is", "find the", "title of"): + * These multi-word patterns are highly specific → immediate match. + * + * 2. **Strong extraction verbs** ("extract", "scrape", "read"): + * These verbs unambiguously indicate extraction → match on their own. + * + * 3. **Ambiguous verb + content noun combos** ("find" + "title"): + * Verbs like "find" or "get" are ambiguous alone, but when paired with + * a content noun ("title", "price", "name") they indicate extraction. + * This prevents false positives like "find a product" (no content noun). + * + * Ported from sdk-python `_is_text_extraction_task()` with improved precision. + * + * @param task - The task or step description to analyse + * @returns true if this looks like a text extraction task + */ +export function isTextExtractionTask(task: string): boolean { + if (!task) { + return false; + } + + const taskLower = task.toLowerCase(); + + // Tier 1: Strong extraction phrases (multi-word substring match) + for (const phrase of EXTRACTION_PHRASES) { + if (taskLower.includes(phrase)) { + return true; + } + } + + // Tier 2: Strong extraction verbs (word boundary match) + for (const verb of EXTRACTION_VERBS) { + if (new RegExp(`\\b${escapeRegExp(verb)}(s|ed|ing)?\\b`).test(taskLower)) { + return true; + } + } + + // Tier 3: Ambiguous verb + content noun combo + // E.g., "find the title" = yes, "find a product" = no, "list all reviews" = yes + const hasAmbiguousVerb = AMBIGUOUS_VERBS.some(verb => + new RegExp(`\\b${escapeRegExp(verb)}\\b`).test(taskLower) + ); + if (hasAmbiguousVerb) { + const hasContentNoun = CONTENT_NOUNS.some(noun => + new RegExp(`\\b${escapeRegExp(noun)}(s|es)?\\b`).test(taskLower) + ); + if (hasContentNoun) { + return true; + } + } + + return false; +} + +// --------------------------------------------------------------------------- +// Extraction prompt builder +// --------------------------------------------------------------------------- + +/** + * Build the executor prompt for extracting text from page content. + * + * @param pageContent - Page content (markdown or compact representation) + * @param extractQuery - What to extract + * @returns Tuple of [systemPrompt, userPrompt] + */ +export function buildExtractionPrompt(pageContent: string, extractQuery: string): [string, string] { + const system = `You extract specific text from page content. Return only the extracted text.`; + + const user = `You are a text extraction assistant. Given the page content below, extract the specific information requested. + +PAGE CONTENT: +${pageContent} + +EXTRACTION REQUEST: +${extractQuery} + +INSTRUCTIONS: +1. Read the content carefully +2. Find and extract ONLY the specific information requested +3. Return ONLY the extracted text, nothing else +4. If the information is not found, return "NOT_FOUND" + +EXTRACTED TEXT:`; + + return [system, user]; +} + +/** + * Check if a task description contains extraction-related keywords + * that should trigger extraction-specific planner guidance. + */ +export function isExtractionTask(task: string): boolean { + if (!task) return false; + const taskLower = task.toLowerCase(); + return ( + taskLower.includes('extract') || + taskLower.includes('get the') || + taskLower.includes('what is') || + taskLower.includes('read the') || + taskLower.includes('find the text') || + taskLower.includes('scrape') || + taskLower.includes('title of') || + taskLower.includes('price of') || + taskLower.includes('name of') || + taskLower.includes('content of') + ); +} + +/** + * Get extraction-specific domain guidance for the planner prompt. + * + * This is injected into the planner system prompt when the task + * is detected as an extraction task, instructing the planner to + * use EXTRACT instead of CLICK for data that is already visible. + */ +export function getExtractionDomainGuidance(): string { + return ` + +IMPORTANT: Extraction Task Planning Rules +========================================= +For extraction tasks where data is already visible on the page: + +1. If the data you need is VISIBLE in the page context above: + - Use EXTRACT directly as the ONLY step - no clicking needed + - The EXTRACT action will read the visible text from the page + +2. If you need to navigate to see the data: + - First CLICK or NAVIGATE to the right page + - Then use EXTRACT + +CRITICAL: Do NOT click on links to external sites when extracting. +- Post/article titles often link to EXTERNAL sites +- To extract a title that is visible, use EXTRACT directly on the current page +- Only click if you need to navigate to a detail page (e.g., for comments) + +Example for "Extract the title of the first post": +{ + "action": "EXTRACT", + "target": "first post title", + "goal": "Extract the first post title from the page", + "verify": [] +} +`; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/** + * Escape special regex characters in a string. + */ +function escapeRegExp(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/src/agents/planner-executor/index.ts b/src/agents/planner-executor/index.ts index e391cdf..5deacad 100644 --- a/src/agents/planner-executor/index.ts +++ b/src/agents/planner-executor/index.ts @@ -196,6 +196,13 @@ export { computeTaskHash as computeLearningTaskHash, } from './learning-extractor'; export type { LearningExtractionOptions, LearningExtractionResult } from './learning-extractor'; +export { + TEXT_EXTRACTION_KEYWORDS, + isTextExtractionTask, + isExtractionTask, + getExtractionDomainGuidance, + buildExtractionPrompt, +} from './extraction-keywords'; // Runtime (Playwright/Chromium) export { diff --git a/src/agents/planner-executor/plan-utils.ts b/src/agents/planner-executor/plan-utils.ts index f43a4a9..96904a1 100644 --- a/src/agents/planner-executor/plan-utils.ts +++ b/src/agents/planner-executor/plan-utils.ts @@ -168,6 +168,56 @@ function repairJson(text: string): string { return repaired; } +/** + * Extract the first balanced JSON object from a string. + * + * Handles cases where the LLM outputs multiple concatenated JSON objects: + * {"action":"EXTRACT",...},{"action":"CLICK",...} + * + * Uses brace-depth counting to find the first complete `{...}` block. + * + * @returns The first balanced JSON object string, or null if not found + */ +function extractFirstJsonObject(text: string): string | null { + const start = text.indexOf('{'); + if (start === -1) return null; + + let depth = 0; + let inString = false; + let escape = false; + + for (let i = start; i < text.length; i++) { + const ch = text[i]; + + if (escape) { + escape = false; + continue; + } + + if (ch === '\\' && inString) { + escape = true; + continue; + } + + if (ch === '"') { + inString = !inString; + continue; + } + + if (inString) continue; + + if (ch === '{') depth++; + else if (ch === '}') { + depth--; + if (depth === 0) { + return text.slice(start, i + 1); + } + } + } + + return null; +} + /** * Extract JSON from LLM response that may contain markdown or prose. * @@ -203,6 +253,24 @@ export function extractJson(content: string): Record { } // Try to find JSON object in text + // First try to extract just the FIRST balanced JSON object to handle + // cases where LLM outputs multiple objects like: {"action":"EXTRACT",...},{"action":"CLICK",...} + const firstObj = extractFirstJsonObject(cleaned); + if (firstObj) { + try { + return JSON.parse(firstObj); + } catch { + // Try to repair common JSON issues: unquoted keys (valid JS but not valid JSON) + try { + const repaired = repairJson(firstObj); + return JSON.parse(repaired); + } catch { + // Continue to last resort + } + } + } + + // Fallback: greedy match (original behavior) const jsonMatch = cleaned.match(/\{[\s\S]*\}/); if (jsonMatch) { try { diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts index e19827d..9b2213d 100644 --- a/src/agents/planner-executor/planner-executor-agent.ts +++ b/src/agents/planner-executor/planner-executor-agent.ts @@ -75,6 +75,12 @@ import { import { detectPruningCategory } from './category-pruner'; import { pruneWithRecovery, fullSnapshotContainsIntent } from './pruning-recovery'; import type { Tracer } from '../../tracing/tracer'; +import { + isTextExtractionTask, + isExtractionTask, + getExtractionDomainGuidance, + buildExtractionPrompt, +} from './extraction-keywords'; // --------------------------------------------------------------------------- // Token Usage Collector @@ -442,6 +448,9 @@ export interface AgentRuntime { /** Scroll by delta (returns true if scroll was effective) */ scrollBy(dy: number): Promise; + + /** Read page content as markdown (for EXTRACT actions) */ + readMarkdown?(options?: { maxChars?: number }): Promise; } // --------------------------------------------------------------------------- @@ -1493,6 +1502,149 @@ export class PlannerExecutorAgent { }; } + // Handle EXTRACT action — read page content and extract data via LLM + if (plannerAction.action === 'EXTRACT') { + const extractQuery = + plannerAction.goal || + plannerAction.intent || + plannerAction.target || + task || + 'Extract relevant data from the current page'; + + if (this.config.verbose) { + console.log(`[ACTION] EXTRACT - query: "${extractQuery}"`); + } + + try { + // Determine extraction path + const useMarkdown = isTextExtractionTask(extractQuery); + + if (useMarkdown && runtime.readMarkdown) { + // Text-based extraction: read page as markdown, then use executor LLM + const pageContent = await runtime.readMarkdown({ maxChars: 8000 }); + + if (!pageContent) { + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.FAILED, + actionTaken: 'EXTRACT', + verificationPassed: false, + usedVision: false, + durationMs: Date.now() - stepStart, + error: 'Failed to read page content as markdown', + }; + } + + if (this.config.verbose) { + const preview = pageContent.slice(0, 160).replace(/\n/g, ' '); + console.log(` [ACTION] EXTRACT - got markdown: ${preview}...`); + } + + // Build extraction prompt and call executor LLM + const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery); + const extractResp = await this.executor.generate(extSystem, extUser, { + temperature: 0.0, + max_tokens: 500, + }); + this.recordTokenUsage('extract', extractResp); + + const extractedText = (extractResp.content || '').trim(); + if (extractedText && extractedText !== 'NOT_FOUND') { + if (this.config.verbose) { + console.log(` [ACTION] EXTRACT ok: ${extractedText.slice(0, 160)}`); + } + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.SUCCESS, + actionTaken: 'EXTRACT', + verificationPassed: true, + usedVision: false, + durationMs: Date.now() - stepStart, + urlBefore: currentUrl, + urlAfter: currentUrl, + extractedData: { text: extractedText, query: extractQuery }, + }; + } else { + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.FAILED, + actionTaken: 'EXTRACT', + verificationPassed: false, + usedVision: false, + durationMs: Date.now() - stepStart, + error: `Could not find requested data: ${extractQuery}`, + }; + } + } else { + // Fallback: use compact snapshot context for extraction + const pageContent = ctx.compactRepresentation; + if (!pageContent || pageContent.trim().length === 0) { + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.FAILED, + actionTaken: 'EXTRACT', + verificationPassed: false, + usedVision: false, + durationMs: Date.now() - stepStart, + error: 'No page content available for extraction', + }; + } + + const [extSystem, extUser] = buildExtractionPrompt(pageContent, extractQuery); + const extractResp = await this.executor.generate(extSystem, extUser, { + temperature: 0.0, + max_tokens: 500, + }); + this.recordTokenUsage('extract', extractResp); + + const extractedText = (extractResp.content || '').trim(); + if (extractedText && extractedText !== 'NOT_FOUND') { + if (this.config.verbose) { + console.log(` [ACTION] EXTRACT ok (snapshot): ${extractedText.slice(0, 160)}`); + } + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.SUCCESS, + actionTaken: 'EXTRACT', + verificationPassed: true, + usedVision: false, + durationMs: Date.now() - stepStart, + urlBefore: currentUrl, + urlAfter: currentUrl, + extractedData: { text: extractedText, query: extractQuery }, + }; + } else { + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.FAILED, + actionTaken: 'EXTRACT', + verificationPassed: false, + usedVision: false, + durationMs: Date.now() - stepStart, + error: `Could not extract requested data: ${extractQuery}`, + }; + } + } + } catch (e) { + return { + stepId: stepNum, + goal: extractQuery, + status: StepStatus.FAILED, + actionTaken: 'EXTRACT', + verificationPassed: false, + usedVision: false, + durationMs: Date.now() - stepStart, + error: e instanceof Error ? e.message : String(e), + }; + } + } + // For CLICK and TYPE_AND_SUBMIT, we need to find the element const isTypeAction = plannerAction.action === 'TYPE_AND_SUBMIT'; @@ -2400,6 +2552,10 @@ export class PlannerExecutorAgent { return plannerAction; } + if (isExtractionTask(task)) { + return plannerAction; + } + const candidate = this.findVisibleResultLink(task, ctx.snapshot); if (!candidate) { return plannerAction; diff --git a/src/agents/planner-executor/playwright-runtime.ts b/src/agents/planner-executor/playwright-runtime.ts index b392b56..2b5627c 100644 --- a/src/agents/planner-executor/playwright-runtime.ts +++ b/src/agents/planner-executor/playwright-runtime.ts @@ -437,6 +437,41 @@ export class PlaywrightRuntime implements AgentRuntime { return page.url(); } + /** + * Read the current page content as cleaned markdown-like text. + * Uses innerText extraction and strips excessive whitespace. + */ + async readMarkdown(options?: { maxChars?: number }): Promise { + this.ensureStarted(); + + const page = this.browser.getPage(); + if (!page) { + throw new Error('Page not available'); + } + + try { + const text = await page.locator('body').innerText({ timeout: 5000 }); + const maxChars = options?.maxChars ?? 50000; + let result = text.replace(/\n{3,}/g, '\n\n').trim(); + if (result.length > maxChars) { + result = result.slice(0, maxChars) + '\n\n[... content truncated ...]'; + } + return result || null; + } catch { + // Fallback: use evaluate to get document body textContent + const text = await page.evaluate(() => { + const el = document.querySelector('body'); + return el?.textContent ?? ''; + }); + const maxChars = options?.maxChars ?? 50000; + let result = text.replace(/\n{3,}/g, '\n\n').trim(); + if (result.length > maxChars) { + result = result.slice(0, maxChars) + '\n\n[... content truncated ...]'; + } + return result || null; + } + } + /** * Get the viewport height. */ diff --git a/src/agents/planner-executor/prompts.ts b/src/agents/planner-executor/prompts.ts index 0f2e44b..8124494 100644 --- a/src/agents/planner-executor/prompts.ts +++ b/src/agents/planner-executor/prompts.ts @@ -6,6 +6,7 @@ */ import type { ActionRecord } from './plan-models'; +import { isExtractionTask, getExtractionDomainGuidance } from './extraction-keywords'; // --------------------------------------------------------------------------- // Stepwise Planner Prompt (ReAct-style) @@ -98,12 +99,15 @@ RULES: 10. Do NOT return DONE until ALL parts of the goal are complete 11. Never copy example URLs from these instructions. Only NAVIGATE to a URL from the user's task, the current page, or a visible element.`; + // Inject extraction-specific guidance when the goal is an extraction task + const extractionGuidance = isExtractionTask(goal) ? getExtractionDomainGuidance() : ''; + // NOTE: /no_think MUST be at the START of user message for Qwen3 models const user = `/no_think Goal: ${goal} Current URL: ${currentUrl} - +${extractionGuidance} ${historyText}Current page elements (ID|role|text|importance|clickable|...): ${pageContext} diff --git a/src/agents/planner-executor/pruning-types.ts b/src/agents/planner-executor/pruning-types.ts index d981d6e..c372a72 100644 --- a/src/agents/planner-executor/pruning-types.ts +++ b/src/agents/planner-executor/pruning-types.ts @@ -6,6 +6,7 @@ export enum PruningTaskCategory { FORM_FILLING = 'form_filling', SEARCH = 'search', CHECKOUT = 'checkout', + EXTRACTION = 'extraction', GENERIC = 'generic', } diff --git a/src/llm-provider.ts b/src/llm-provider.ts index 1f6c88f..f321888 100644 --- a/src/llm-provider.ts +++ b/src/llm-provider.ts @@ -216,6 +216,24 @@ export class LocalLLMProvider extends LLMProvider { // Don't extract NONE - if model is still reasoning, let it continue } } + + // Final fallback: if no structured pattern matched, use the last + // non-empty line of reasoning as the content. This handles plain text + // answers from extraction tasks where the model thinks but doesn't + // produce content outside the reasoning field. + if (!content) { + const lines = message.reasoning.split('\n'); + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i].trim(); + if (line && !line.startsWith('```') && !line.startsWith('//')) { + content = line; + console.log( + `[LocalLLMProvider DEBUG] Extracted last reasoning line as content: ${content.slice(0, 100)}` + ); + break; + } + } + } } const usage = data?.usage; From 8365f4634576fd939316645b7e72a305d0b70b1f Mon Sep 17 00:00:00 2001 From: SentienceDEV Date: Sun, 3 May 2026 13:46:28 -0700 Subject: [PATCH 2/2] text extraction task for agent --- .../planner-executor/extraction-keywords.ts | 12 +++++-- src/agents/planner-executor/plan-models.ts | 4 ++- .../planner-executor-agent.ts | 33 ++++++++++++++++++- src/agents/planner-executor/prompts.ts | 3 ++ src/llm-provider.ts | 9 ++--- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/agents/planner-executor/extraction-keywords.ts b/src/agents/planner-executor/extraction-keywords.ts index 9aa87a2..c47db5a 100644 --- a/src/agents/planner-executor/extraction-keywords.ts +++ b/src/agents/planner-executor/extraction-keywords.ts @@ -208,9 +208,12 @@ export function isTextExtractionTask(task: string): boolean { * @returns Tuple of [systemPrompt, userPrompt] */ export function buildExtractionPrompt(pageContent: string, extractQuery: string): [string, string] { - const system = `You extract specific text from page content. Return only the extracted text.`; + // NOTE: /no_think MUST be at the START of user message for Qwen3 models. + // Without it, Qwen3 puts the answer in tags and content is empty. + const system = `You extract specific text from page content. Return only the extracted text. Do NOT output any thinking, reasoning, or explanation.`; - const user = `You are a text extraction assistant. Given the page content below, extract the specific information requested. + const user = `/no_think +You are a text extraction assistant. Given the page content below, extract the specific information requested. PAGE CONTENT: ${pageContent} @@ -246,7 +249,10 @@ export function isExtractionTask(task: string): boolean { taskLower.includes('title of') || taskLower.includes('price of') || taskLower.includes('name of') || - taskLower.includes('content of') + taskLower.includes('content of') || + taskLower.includes('headline of') || + taskLower.includes('rating of') || + taskLower.includes('review of') ); } diff --git a/src/agents/planner-executor/plan-models.ts b/src/agents/planner-executor/plan-models.ts index ef6621a..4ace0b9 100644 --- a/src/agents/planner-executor/plan-models.ts +++ b/src/agents/planner-executor/plan-models.ts @@ -168,7 +168,7 @@ export interface RepairHistoryEntry { export interface ActionRecord { /** Step number (1-indexed) */ stepNum: number; - /** Action type (CLICK, TYPE_AND_SUBMIT, SCROLL, etc.) */ + /** Action type (CLICK, TYPE_AND_SUBMIT, SCROLL, EXTRACT, etc.) */ action: string; /** Element description or URL */ target: string | null; @@ -176,6 +176,8 @@ export interface ActionRecord { result: string; /** URL after action completed */ urlAfter: string | null; + /** Extracted data for EXTRACT actions (the text that was extracted) */ + extractedData?: string | null; } // --------------------------------------------------------------------------- diff --git a/src/agents/planner-executor/planner-executor-agent.ts b/src/agents/planner-executor/planner-executor-agent.ts index 9b2213d..2c1f4bc 100644 --- a/src/agents/planner-executor/planner-executor-agent.ts +++ b/src/agents/planner-executor/planner-executor-agent.ts @@ -78,7 +78,6 @@ import type { Tracer } from '../../tracing/tracer'; import { isTextExtractionTask, isExtractionTask, - getExtractionDomainGuidance, buildExtractionPrompt, } from './extraction-keywords'; @@ -1161,12 +1160,24 @@ export class PlannerExecutorAgent { // Record action history after any auth-boundary or optional-substep recovery. if (!actionHistoryRecorded) { + // For EXTRACT actions, include the extracted data so the planner + // knows what was already extracted and can avoid repeating + const extractedText = + plannerAction.action === 'EXTRACT' && finalOutcome.extractedData + ? typeof finalOutcome.extractedData === 'object' && + finalOutcome.extractedData !== null && + 'text' in (finalOutcome.extractedData as Record) + ? ((finalOutcome.extractedData as Record).text as string) + : JSON.stringify(finalOutcome.extractedData) + : undefined; + this.actionHistory.push({ stepNum, action: plannerAction.action, target: this.summarizePlannerActionTarget(plannerAction), result: finalOutcome.status === StepStatus.SUCCESS ? 'success' : 'failed', urlAfter, + extractedData: extractedText || undefined, }); } @@ -1183,6 +1194,26 @@ export class PlannerExecutorAgent { success = true; } + // Auto-complete extraction tasks: if the action was a successful EXTRACT + // and the overall task is an extraction task, the goal is achieved. + // This prevents infinite EXTRACT loops on extraction-focused tasks. + // Uses isTextExtractionTask (comprehensive) rather than isExtractionTask (simpler) + // to cover more extraction patterns across any website. + if ( + !success && + plannerAction.action === 'EXTRACT' && + finalOutcome.status === StepStatus.SUCCESS && + finalOutcome.extractedData && + isTextExtractionTask(task) + ) { + if (this.config.verbose) { + console.log( + `[EXTRACT] Extraction task completed successfully, transitioning to DONE` + ); + } + success = true; + } + if (this.recoveryState && this.config.recovery.trackSuccessfulUrls && urlAfter) { this.recoveryState.recordCheckpoint({ url: urlAfter, diff --git a/src/agents/planner-executor/prompts.ts b/src/agents/planner-executor/prompts.ts index 8124494..a987eba 100644 --- a/src/agents/planner-executor/prompts.ts +++ b/src/agents/planner-executor/prompts.ts @@ -40,6 +40,9 @@ export function buildStepwisePlannerPrompt( if (rec.urlAfter) { historyText += ` [URL: ${rec.urlAfter.slice(0, 60)}...]`; } + if (rec.extractedData) { + historyText += ` [EXTRACTED: ${rec.extractedData.slice(0, 120)}]`; + } historyText += '\n'; } historyText += '\n'; diff --git a/src/llm-provider.ts b/src/llm-provider.ts index f321888..59acc33 100644 --- a/src/llm-provider.ts +++ b/src/llm-provider.ts @@ -395,14 +395,11 @@ export class OllamaProvider extends LocalLLMProvider { options: Record = {} ): Promise { // For Qwen3 models, add think: false to disable reasoning output - // Ollama OpenAI-compatible API passes model options via 'options' field + // Ollama OpenAI-compatible API expects 'think' as a TOP-LEVEL field, + // NOT nested under 'options'. See: https://github.com/ollama/ollama/blob/main/docs/openai.md const ollamaOptions = { ...options }; if (this._disableThinking) { - // Merge with existing options if any - ollamaOptions.options = { - ...(ollamaOptions.options || {}), - think: false, - }; + ollamaOptions.think = false; } return super.generate(systemPrompt, userPrompt, ollamaOptions); }