Skip to content

Latest commit

 

History

History
1468 lines (1261 loc) · 55.9 KB

File metadata and controls

1468 lines (1261 loc) · 55.9 KB

verify_result = Value::Null;

    if let Some(js) = verify_js {
        let vr = evaluate_js(page, js).await?;
        let vr_str = match &vr {
            Value::Bool(b) => b.to_string(),
            Value::String(s) => s.clone(),
            other => other.to_string(),
        };
        verified = vr_str == "true" || vr_str.contains("true");
        verify_result = vr;
    }
    
    let step = serde_json::json!({
        "step": step_num + 1,
        "url": url,
        "title": title_str,
        "observation": observation,
        "goal": goal,
        "verified": verified,
        "verify_result": verify_result
    });
    steps.push(step);
    
    if verified {
        return Ok(serde_json::json!({
            "status": "goal_achieved",
            "total_steps": step_num + 1,
            "goal": goal,
            "steps": steps
        }));
    }
}

Ok(serde_json::json!({
    "status": "max_steps_reached",
    "total_steps": max_steps,
    "goal": goal,
    "steps": steps
}))

}


**KEY CHARACTERISTICS**:
- ✗ No **PLANNING** phase
- ✗ No **ACTION** execution
- ✓ Only **OBSERVE** → **VERIFY** cycle
- Used for tracing/monitoring, NOT autonomous automation
- Returns observation snapshots at each step

---

## 4. AGENT THINK — Analysis & Recommendations (NO Execution)

**File**: `/Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs` (lines **37 0-440**)

```rust
pub async fn think(page: &Page) -> Result<Value> {
    let js = r#"
        (() => {
            const state = {
                url: window.location.href,
                title: document.title,
                ready: document.readyState,
                scroll: { x: window.scrollX, y: window.scrollY, maxY: document.documentElement.scrollHeight - window.inn erHeight },
                viewport: { w: window.innerWidth, h: window.innerHeight }
            };
            
            const buttons = document.querySelectorAll('button, [role="button"]');
            const links = document.querySelectorAll('a[href]');
            const inputs = document.querySelectorAll('input, textarea, select');
            const forms = document.querySelectorAll('form');
            
            // Find prominent CTAs
            const ctas = Array.from(buttons).filter(b => {
                const rect = b.getBoundingClientRect();
                return rect.width > 50 && rect.height > 20 && rect.top < window.innerHeight;
            }).map(b => ({
                text: (b.innerText || '').trim().substring(0, 50),
                tag: b.tagName.toLowerCase(),
                type: b.type || '',
                disabled: b.disabled
            })).slice(0, 10);
            
            // Find empty required inputs
            const emptyInputs = Array.from(inputs).filter(i => {
                return i.required && !i.value && i.getBoundingClientRect().width > 0;
            }).map(i => ({
                name: i.name || i.id || i.placeholder || i.type,
                type: i.type
            })).slice(0, 10);
            
            // Detect page characteristics
            const hasLogin = !!(document.querySelector('[type="password"]') || document.querySelector('form[action*="log in"]'));
            const hasSearch = !!(document.querySelector('[type="search"]') || document.querySelector('[name="q"]'));
            const hasModal = !!(document.querySelector('[role="dialog"]') || document.querySelector('.modal.show'));
            const hasCaptcha = !!(document.querySelector('[class*="captcha"]') || document.querySelector('iframe[src*="c aptcha"]'));
            const isLoading = !!(document.querySelector('.loading, .spinner, [aria-busy="true"]'));
            
            const analysis = {
                page_type: hasLogin ? 'login_page' : hasSearch ? 'search_page' : hasModal ? 'modal_open' : 'content_page ',
                state,
                interactive: {
                    buttons: buttons.length,
                    links: links.length,
                    inputs: inputs.length,
                    forms: forms.length
                },
                prominent_ctas: ctas,
                empty_required: emptyInputs,
                flags: { hasLogin, hasSearch, hasModal, hasCaptcha, isLoading },
                recommendations: []
            };
            
            // Build recommendations based on page state
            if (hasCaptcha) analysis.recommendations.push({ action: 'solve_captcha', priority: 'high', reason: 'CAPTCHA  detected' });
            if (hasModal) analysis.recommendations.push({ action: 'dismiss_modal', priority: 'high', reason: 'Modal bloc king interaction' });
            if (isLoading) analysis.recommendations.push({ action: 'wait', priority: 'high', reason: 'Page still loading ' });
            if (emptyInputs.length > 0) analysis.recommendations.push({ action: 'fill_form', priority: 'medium', reason:  `${emptyInputs.length} required inputs empty` });
            if (hasLogin) analysis.recommendations.push({ action: 'authenticate', priority: 'medium', reason: 'Login for m detected' });
            if (ctas.length > 0) analysis.recommendations.push({ action: 'click_cta', priority: 'low', reason: `${ctas.l ength} CTAs available` });
            if (state.scroll.maxY > 0 && state.scroll.y === 0) analysis.recommendations.push({ action: 'scroll_explore',  priority: 'low', reason: 'Page has scrollable content' });
            
            return JSON.stringify(analysis);
        })()
    "#.to_string();
    
    let result = page.evaluate(js).await?;
    let raw: String = result.into_value().unwrap_or_else(|_| "{}".to_string());
    Ok(serde_json::from_str(&raw).unwrap_or(serde_json::json!({})))
}

KEY CHARACTERISTICS:

  • ✓ Identifies page_type: "login_page", "search_page", "modal_open", "content_page"
  • ✓ Detects flags: hasLogin, hasSearch, hasModal, hasCaptcha, isLoading
  • ✓ Provides recommendations with priority levels (high/medium/low)
  • ✗ Does NOT execute any actions
  • Used for analysis and decision-making, not automation

5. AGENT CHAIN — Executes Pre-Written JS Actions

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 28 7-367)

pub async fn auto_chain(
    page: &Page,
    actions: &[String],
    on_error: &str,
    max_retries: usize,
) -> Result<Value> {
    let mut results = Vec::new();
    
    for (i, action_js) in actions.iter().enumerate() {
        let mut success = false;
        let mut last_err = String::new();
        let mut attempts = 0;
        
        // Retry loop for each action
        for attempt in 0..=max_retries {
            attempts = attempt + 1;
            match page.evaluate(action_js.to_string()).await {
                Ok(val) => {
                    let r: String = val.into_value().unwrap_or_else(|_| "null".to_string());
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "success",
                        "result": r,
                        "attempts": attempts
                    }));
                    success = true;
                    break;
                }
                Err(e) => {
                    last_err = e.to_string();
                    if on_error != "retry" || attempt == max_retries {
                        break;
                    }
                    tokio::time::sleep(std::time::Duration::from_millis(500)).await;
                }
            }
        }
        
        // Handle failure based on on_error strategy
        if !success {
            match on_error {
                "skip" => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "skipped",
                        "error": last_err,
                        "attempts": attempts
                    }));
                }
                "abort" => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "aborted",
                        "error": last_err,
                        "attempts": attempts
                    }));
                    return Ok(serde_json::json!({
                        "status": "aborted",
                        "completed_steps": i,
                        "total_steps": actions.len(),
                        "results": results
                    }));
                }
                _ => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "failed",
                        "error": last_err,
                        "attempts": attempts
                    }));
                }
            }
        }
    }
    
    let all_ok = results.iter().all(|r| r["status"] == "success");
    Ok(serde_json::json!({
        "status": if all_ok { "all_success" } else { "partial" },
        "completed_steps": results.len(),
        "total_steps": actions.len(),
        "results": results
    }))
}

ERROR HANDLING STRATEGIES:

  • "skip" (default): Continue to next action if one fails
  • "retry": Retry failed action up to max_retries times
  • "abort": Stop execution immediately on first failure

6. AGENT OBSERVE — Annotated Page State

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 16 0-232)

pub async fn annotated_observe(page: &Page) -> Result<Value> {
    let js = r#"
        (() => {
            const elements = [];
            const interactive = document.querySelectorAll(
                'a, button, input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"],  [role="combobox"], [role="menuitem"], [tabindex]:not([tabindex="-1"])'
            );
            
            let ref_counter = 0;
            interactive.forEach(el => {
                const rect = el.getBoundingClientRect();
                if (rect.width === 0 && rect.height === 0) return;
                
                ref_counter++;
                const ref_id = `@e${ref_counter}`;
                
                const tag = el.tagName.toLowerCase();
                const role = el.getAttribute('role') || '';
                const text = (el.innerText || el.textContent || '').trim().substring(0, 100);
                const ariaLabel = el.getAttribute('aria-label') || '';
                const placeholder = el.getAttribute('placeholder') || '';
                const type = el.getAttribute('type') || '';
                const href = el.getAttribute('href') || '';
                const name = el.getAttribute('name') || '';
                const id = el.id || '';
                const value = el.value || '';
                const disabled = el.disabled || false;
                const checked = el.checked || false;
                
                elements.push({
                    ref: ref_id,
                    tag,
                    role,
                    text,
                    aria_label: ariaLabel,
                    placeholder,
                    type,
                    href,
                    name,
                    id,
                    value,
                    disabled,
                    checked,
                    bounds: {
                        x: Math.round(rect.x),
                        y: Math.round(rect.y),
                        width: Math.round(rect.width),
                        height: Math.round(rect.height),
                        center_x: Math.round(rect.x + rect.width / 2),
                        center_y: Math.round(rect.y + rect.height / 2)
                    }
                });
            });
            
            const url = window.location.href;
            const title = document.title;
            const viewport = { width: window.innerWidth, height: window.innerHeight };
            const scroll = { x: window.scrollX, y: window.scrollY, max_y: document.documentElement.scrollHeight - window .innerHeight };
            
            return {
                url,
                title,
                viewport,
                scroll,
                elements,
                element_count: elements.length,
                timestamp: Date.now()
            };
        })()
    "#;
    
    evaluate_js(page, js).await
}

RETURNED STRUCTURE:

  • elements: Array with @e1, @e2, ... references
  • Each element includes: tag, role, text, bounds (x, y, width, height, center_x, center_y)
  • viewport & scroll state
  • timestamp

7. SMART ACTIONS — Intelligent Element Finding

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/smart_actions.rs (193 lines)

smart_find() — Multi-Strategy Element Discovery

Uses 5 strategies in order of confidence:

  1. Exact Text Match (confidence: 1.0)
    • Matches button/link text exactly
  2. Fuzzy Text Match (confidence: 0.5-0.8)
    • Partial text matches, similarity-scored
  3. ARIA Role Match (confidence: 0.6)
    • Matches aria-label and role attributes
  4. Attribute Match (confidence: 0.4-0.7)
    • Matches placeholder, name, id, title, alt, aria-label
  5. CSS Selector (confidence: 0.95)
    • If query looks like a CSS selector (starts with . or # or contains [)

Returns deduplicated results sorted by confidence (up to 10 matches).


SHARED AGENT UTILITIES

Agent Memory: AgentMemory

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/agent_memory.rs (443 lines)

pub struct MemoryEntry {
    pub key: String,
    pub value: serde_json::Value,
    pub category: MemoryCategory,  // PageVisit, ElementPattern, DomainStrategy, RetryKnowledge, etc.
    pub domain: Option<String>,
    pub created_at: u64,
    pub accessed_at: u64,
    pub access_count: u64,
    pub ttl_seconds: Option<u64>,
}

pub enum MemoryCategory {
    PageVisit,         // Track visited pages
    ElementPattern,    // Learned selectors for elements
    DomainStrategy,    // Domain-specific strategies (login flows, popups, etc.)
    RetryKnowledge,    // What worked after a failure
    UserPreference,    // User-provided settings
    SelectorMapping,   // Element → selector mappings
    ErrorPattern,      // Common errors and solutions
    Custom,            // User-defined
}

Session Context

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 235-284)

Stores/retrieves session data in window.__onecrawl_ctx:

  • "set" — Store a key-value pair
  • "get" — Retrieve a value
  • "get_all" — Retrieve all context
  • "clear" — Clear all context

FULL ACTION VOCABULARY (Available to agent auto)

✓ navigate          — Go to URL
✓ click             — Click element (CSS selector-based)
✓ type              — Type text into element
✓ smart_click       — Click element (multi-strategy discovery)
✓ smart_fill        — Fill element with text (multi-strategy discovery)
✓ wait              — Wait for element to appear
✓ snapshot          — Take page snapshot (URL, title, interactive count)
✓ extract           — Extract element content (text, HTML, tag)
✓ assert            — Verify condition (url_contains, title_contains, element_exists, text_contains)
✓ scroll            — Scroll page (up/down/left/right by amount)
✓ screenshot        — Capture screenshot
✓ memory_store      — Store data in persistent memory
✓ memory_recall     — Retrieve data from memory
✓ conditional       — If/else branching

COMPARISON MATRIX

Feature agent auto agent loop agent think agent chain agent observe
Planning ✓ (task_planner)
Observes
Executes ✓ (pre-written)
Recommends (via plan)
Retries ✓ (3x per step) ✓ (configurable)
Memory
Cost tracking
Screenshot ✓ (optional)
Resume

PROBLEMS & RECOMMENDATIONS

Problem 1: agent loop is observation-only

Why: It's designed for tracing/monitoring, not automation. Fix: Use agent auto for autonomous execution.

Problem 2: agent auto planner may be too generic

Why: task_planner::plan_from_goal() uses keyword matching → may misclassify goals. Fix:

  • Add domain-specific context extraction (e.g., recognize "login to Gmail" vs generic "login")
  • Use memory to store successful strategies for repeated domains
  • Add confidence scoring to step generation

Problem 3: Missing dynamic observation during planning

Why: Task planner doesn't see current page state when generating steps. Fix:

  • Pass annotated_observe() result to planner
  • Use current page state to refine action targets
  • Update plan mid-execution based on think() output

Problem 4: Limited error recovery

Why: Retries just repeat same action (no adaptation). Fix:

  • Use fallback actions from PlannedStep::fallback
  • Consult memory for alternative selectors when primary fails
  • Call think() to get recommendations on failure

FILE LOCATIONS SUMMARY

  • Agent Auto (planning + execution): crates/onecrawl-cdp/src/agent_auto.rs
  • Task Planner (goal→steps): crates/onecrawl-cdp/src/task_planner.rs
  • Agent Core (loop, think, chain, observe): crates/onecrawl-cdp/src/agent.rs
  • Memory System: crates/onecrawl-cdp/src/agent_memory.rs
  • Smart Actions: crates/onecrawl-cdp/src/smart_actions.rs
  • CLI Wrapper: crates/onecrawl-cli-rs/src/commands/browser/agent_computer.rs
  • CLI Enum: crates/onecrawl-cli-rs/src/cli/agent.rs

EOF cat /tmp/agent_analysis.md

            EC=$?
            echo "___BEGIN___COMMAND_DONE_MARKER___$EC"
        }

BEGIN___COMMAND_OUTPUT_MARKER

OneCrawl Agent System - Complete Implementation Analysis

Executive Summary

The OneCrawl agent system has 5 core agent commands that work together:

  1. agent auto - Full autonomous goal-based automation with planning & execution
  2. agent loop - Simple observe→plan→act→verify cycles (observation-only, NO action execution)
  3. agent think - Analyzes page and recommends actions (analysis-only, NO execution)
  4. agent chain - Executes pre-written JS action chains with error recovery
  5. agent observe - Gets annotated page state with element coordinates

The KEY PROBLEM: agent loop and agent auto have different planners:

  • agent auto uses task_planner::plan_from_goal() → generates steps → executes them
  • agent loop only observes, never executes (it's a tracing tool)
  • agent think generates recommendations but doesn't act

1. AGENT AUTO — The Full Autonomous Planner + Executor

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent_auto.rs (872 lines)

Entry Point

pub async fn agent_auto_run(page: &Page, config: AgentAutoConfig) -> Result<AgentAutoResult> {
    let mut agent = AgentAuto::new(config);
    let current_url = page.url().await.ok().flatten().unwrap_or_default();
    agent.plan_with_url(Some(current_url))?;  // ← PLAN PHASE
    agent.execute(page).await                  // ← EXECUTION PHASE
}

Planning Phase: plan_with_url()

Located at lines 183-229:

pub fn plan_with_url(&mut self, current_url: Option<String>) -> Result<Vec<AutoStep>> {
    // 1. Extract context from goal (keywords, URLs, quoted values, emails)
    let mut context = task_planner::extract_context(&self.config.goal);
    
    // 2. Inject current page URL (avoids spurious "navigate to about:blank")
    if !context.contains_key("url") {
        if let Some(url) = current_url {
            if !url.is_empty() && url != "about:blank" {
                context.insert("url".into(), url);
            }
        }
    }
    
    // 3. Use task_planner to generate steps from goal + context
    let plan = task_planner::plan_from_goal(&self.config.goal, &context);
    
    // 4. Convert PlannedAction → AutoStep
    self.steps = plan
        .steps
        .iter()
        .enumerate()
        .map(|(i, ps)| {
            let (action_type, target, value) = action_to_parts(&ps.action);
            AutoStep {
                index: i,
                description: ps.description.clone(),
                action_type,     // "navigate", "click", "type", "wait", etc.
                target,           // CSS selector, URL, etc.
                value,            // Text to type, timeout, etc.
                status: StepStatus::Pending,
                result: None,
                screenshot_path: None,
                error: None,
                retries: 0,
                duration_ms: 0,
            }
        })
        .collect();
    
    Ok(self.steps.clone())
}

Execution Phase: execute()

Located at lines 232-278:

pub async fn execute(&mut self, page: &Page) -> Result<AgentAutoResult> {
    let mut safety = SafetyState::new(policy);
    let mut memory = self.load_memory();
    let mut errors: Vec<String> = Vec::new();
    let total = self.steps.len();
    
    for idx in 0..total {
        // 1. Check cost cap & timeout
        if self.check_should_stop(page, idx).await {
            break;
        }
        
        // 2. Skip already-completed or skipped steps
        if self.steps[idx].status == StepStatus::Completed 
            || self.steps[idx].status == StepStatus::Skipped {
            continue;
        }
        
        self.steps[idx].status = StepStatus::Running;
        
        // 3. Safety check (blocks unauthorized domain navigation)
        if !self.check_safety(idx, &mut safety, &mut errors) {
            continue;
        }
        safety.record_action();
        
        // 4. Execute with retry logic (up to 3 retries)
        let step_start = Instant::now();
        let success = self.execute_step_with_retries(idx, page, &mut memory).await;
        
        if !success {
            errors.push(format!("step {idx}: {}", self.steps[idx].error.as_deref().unwrap_or("failed")));
        }
        self.steps[idx].duration_ms = step_start.elapsed().as_millis() as u64;
        
        // 5. Post-step bookkeeping (cost, screenshots, memory, state saves)
        self.post_step_bookkeeping(idx, success, page, &mut memory).await;
    }
    
    if self.config.use_memory {
        let _ = memory.save();
    }
    
    self.build_result(errors)
}

Action Execution: execute_step()

Located at lines 444-582:

async fn execute_step(&mut self, idx: usize, page: &Page) -> Result<()> {
    let step = &self.steps[idx];
    let action_type = step.action_type.clone();
    let target = step.target.clone();
    let value = step.value.clone();
    
    match action_type.as_str() {
        "navigate" => {
            let url = target.ok_or_else(|| Error::Cdp("navigate: missing URL".into()))?;
            page.goto(&url).await?;
            tokio::time::sleep(std::time::Duration::from_millis(500)).await;
        }
        "smart_click" | "click" => {
            let query = target.unwrap_or_else(|| "button".into());
            smart_actions::smart_click(page, &query).await?;
        }
        "smart_fill" | "type" => {
            let query = target.unwrap_or_else(|| "input".into());
            let text = value.unwrap_or_default();
            smart_actions::smart_fill(page, &query, &text).await?;
        }
        "extract" => {
            let selector = target.unwrap_or_else(|| "body".into());
            let js = format!(r#"
                (() => {{
                    const el = document.querySelector({sel});
                    if (!el) return null;
                    return {{
                        text: el.innerText || '',
                        html: el.innerHTML.substring(0, 5000),
                        tag: el.tagName
                    }};
                }})()
            "#, sel = serde_json::to_string(&selector).unwrap_or_default());
            let data = evaluate_js(page, &js).await?;
            if !data.is_null() {
                self.extracted_data.push(data.clone());
                self.steps[idx].result = Some(data);
            }
        }
        "wait" => {
            let wait_target = target.unwrap_or_else(|| "body".into());
            let timeout_ms: u64 = value.as_deref().and_then(|v| v.parse().ok()).unwrap_or(5000);
            let js = format!(r#"
                new Promise((resolve, reject) => {{
                    const sel = {sel};
                    const start = Date.now();
                    const check = () => {{
                        if (document.querySelector(sel)) return resolve(true);
                        if (Date.now() - start > {timeout}) return reject('timeout');
                        requestAnimationFrame(check);
                    }};
                    check();
                }})
            "#, sel = serde_json::to_string(&wait_target).unwrap_or_default(), timeout = timeout_ms);
            let _ = evaluate_js(page, &js).await;
        }
        "snapshot" => {
            let js = r#"
                (() => {
                    const url = location.href;
                    const title = document.title;
                    const interactive = document.querySelectorAll(
                        'a, button, input, select, textarea, [role="button"], [role="link"]'
                    );
                    return {
                        url,
                        title,
                        interactive_count: interactive.length,
                        body_length: document.body?.innerText?.length || 0
                    };
                })()
            "#;
            let data = evaluate_js(page, js).await?;
            self.steps[idx].result = Some(data);
        }
        "screenshot" => {
            if let Ok(path) = self.capture_step_screenshot(page, idx).await {
                self.steps[idx].screenshot_path = Some(path);
            }
        }
        "assert" => {
            let condition = target.unwrap_or_default();
            let expected_value = value.unwrap_or_default();
            let js = format!(r#"
                (() => {{
                    try {{
                        const cond = {safe_cond};
                        const val = {safe_val};
                        if (cond === "url_contains") return window.location.href.includes(val);
                        if (cond === "title_contains") return document.title.includes(val);
                        if (cond === "element_exists") return !!document.querySelector(val);
                        if (cond === "text_contains") return document.body.innerText.includes(val);
                        return false;
                    }} catch(e) {{
                        return false;
                    }}
                }})()
            "#);
            let result = evaluate_js(page, &js).await?;
            let passed = result.as_bool().unwrap_or(false);
            if !passed {
                return Err(Error::Cdp(format!("assertion failed: {condition}").into()));
            }
            self.steps[idx].result = Some(serde_json::json!({ "passed": true }));
        }
        "scroll" => {
            let direction = target.unwrap_or_else(|| "down".into());
            let amount: u32 = value.as_deref().and_then(|v| v.parse().ok()).unwrap_or(500);
            let (dx, dy) = match direction.as_str() {
                "up" => (0i32, -(amount as i32)),
                "down" => (0, amount as i32),
                "left" => (-(amount as i32), 0),
                "right" => (amount as i32, 0),
                _ => (0, amount as i32),
            };
            let js = format!("window.scrollBy({dx}, {dy})");
            let _ = evaluate_js(page, &js).await;
        }
        "memory_store" => {
            self.steps[idx].result = Some(serde_json::json!({ "stored": true }));
        }
        "memory_recall" => {
            self.steps[idx].result = Some(serde_json::json!({ "recalled": true }));
        }
        other => {
            return Err(Error::Cdp(format!("unknown action type: {other}").into()));
        }
    }
    
    Ok(())
}

Action Type Conversion: action_to_parts()

Located at lines 783-828:

fn action_to_parts(action: &task_planner::PlannedAction) -> (String, Option<String>, Option<String>) {
    match action {
        PlannedAction::Navigate { url } => ("navigate".into(), Some(url.clone()), None),
        PlannedAction::Click { target, .. } => ("click".into(), Some(target.clone()), None),
        PlannedAction::Type { target, text, .. } => ("type".into(), Some(target.clone()), Some(text.clone())),
        PlannedAction::Wait { target, timeout_ms } => ("wait".into(), Some(target.clone()), Some(timeout_ms.to_string())),
        PlannedAction::Snapshot {} => ("snapshot".into(), None, None),
        PlannedAction::Extract { target } => ("extract".into(), Some(target.clone()), None),
        PlannedAction::Assert { condition } => ("assert".into(), Some(condition.clone()), None),
        PlannedAction::SmartClick { query } => ("smart_click".into(), Some(query.clone()), None),
        PlannedAction::SmartFill { query, value } => ("smart_fill".into(), Some(query.clone()), Some(value.clone())),
        PlannedAction::Scroll { direction, amount } => ("scroll".into(), Some(direction.clone()), amount.map(|a| a.to_string())),
        PlannedAction::Screenshot { path } => ("screenshot".into(), path.clone(), None),
        PlannedAction::MemoryStore { key, value } => ("memory_store".into(), Some(key.clone()), Some(value.clone())),
        PlannedAction::MemoryRecall { key } => ("memory_recall".into(), Some(key.clone()), None),
        PlannedAction::Conditional { .. } => ("snapshot".into(), None, None),
    }
}

2. TASK PLANNER — Goal→Steps Generation

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/task_planner.rs (498 lines)

Goal Matching: match_goal()

Located at lines 209-234:

pub fn match_goal(goal: &str) -> (GoalCategory, Vec<StepTemplate>) {
    let goal_lower = goal.to_lowercase();
    let patterns = builtin_patterns();
    
    let mut best_match: Option<&GoalPattern> = None;
    let mut best_score = 0;
    
    for pattern in &patterns {
        // Count how many keywords match
        let score: usize = pattern.keywords.iter()
            .filter(|kw| goal_lower.contains(kw.as_str()))
            .count();
        if score > best_score {
            best_score = score;
            best_match = Some(pattern);
        }
    }
    
    match best_match {
        Some(pattern) => (pattern.category.clone(), pattern.template_steps.clone()),
        None => (GoalCategory::Generic, vec![
            StepTemplate { description: "Take snapshot to understand page".into(), action_type: "snapshot".into(), requires_context: false },
            StepTemplate { description: "Execute goal action".into(), action_type: "smart_click".into(), requires_context: true },
            StepTemplate { description: "Verify result".into(), action_type: "snapshot".into(), requires_context: false },
        ]),
    }
}

Plan Generation: plan_from_goal()

Located at lines 237-316:

pub fn plan_from_goal(goal: &str, context: &HashMap<String, String>) -> TaskPlan {
    let (category, templates) = match_goal(goal);
    
    let mut steps = Vec::new();
    let mut confidence: f64 = 0.7;
    
    for (i, template) in templates.iter().enumerate() {
        let action = match template.action_type.as_str() {
            "navigate" => {
                let url = context.get("url").cloned().unwrap_or_else(|| "about:blank".into());
                PlannedAction::Navigate { url }
            }
            "snapshot" => PlannedAction::Snapshot {},
            "smart_fill" => {
                let query = context.get("field").cloned().unwrap_or_else(|| "input".into());
                let value = context.get("value").cloned().unwrap_or_default();
                PlannedAction::SmartFill { query, value }
            }
            "smart_click" => {
                let query = context.get("target").cloned().unwrap_or_else(|| "submit".into());
                PlannedAction::SmartClick { query }
            }
            "extract" => {
                let target = context.get("selector").cloned().unwrap_or_else(|| "body".into());
                PlannedAction::Extract { target }
            }
            "wait" => {
                let target = context.get("wait_for").cloned().unwrap_or_else(|| "body".into());
                PlannedAction::Wait { target, timeout_ms: 10000 }
            }
            "memory_store" => {
                let key = context.get("memory_key").cloned().unwrap_or_else(|| "result".into());
                let value = context.get("memory_value").cloned().unwrap_or_default();
                PlannedAction::MemoryStore { key, value }
            }
            _ => PlannedAction::Snapshot {},
        };
        
        let fallback = if template.requires_context {
            Some(Box::new(PlannedStep {
                id: i * 100 + 1,
                description: format!("Fallback: take snapshot and retry {}", template.description),
                action: PlannedAction::Snapshot {},
                fallback: None,
                confidence: 0.5,
            }))
        } else {
            None
        };
        
        steps.push(PlannedStep {
            id: i,
            description: template.description.clone(),
            action,
            fallback,
            confidence: if template.requires_context { 0.6 } else { 0.9 },
        });
    }
    
    if context.contains_key("url") { confidence += 0.1; }
    if context.contains_key("domain_strategy") { confidence += 0.15; }
    confidence = confidence.min(1.0);
    
    let strategy = if context.contains_key("domain_strategy") {
        PlanStrategy::MemoryAssisted
    } else if category == GoalCategory::Generic {
        PlanStrategy::Exploratory
    } else {
        PlanStrategy::Direct
    };
    
    TaskPlan {
        goal: goal.to_string(),
        steps,
        strategy,
        estimated_duration_ms: templates.len() as u64 * 2000,
        confidence,
        context_used: context.keys().cloned().collect(),
    }
}

Built-in Goal Patterns: builtin_patterns()

Located at lines 130-206:

The system recognizes these goal categories:

  1. Authentication: "login", "log in", "sign in", "authenticate"
  2. Search: "search", "find", "look for", "query"
  3. DataExtraction: "extract", "scrape", "get data", "collect"
  4. FormFilling: "fill", "form", "submit", "complete"
  5. Navigation: "navigate", "go to", "open", "visit"
  6. Interaction: "click", "press", "tap", "select"
  7. Monitoring: "monitor", "watch", "check", "track"
  8. Generic: fallback for unrecognized goals

Each pattern has a set of StepTemplates that are instantiated based on the goal.

Context Extraction: extract_context()

Located at lines 319-362:

pub fn extract_context(goal: &str) -> HashMap<String, String> {
    let mut context = HashMap::new();
    
    // Extract URLs (http://, https://)
    let words: Vec<&str> = goal.split_whitespace().collect();
    for word in &words {
        if word.starts_with("http://") || word.starts_with("https://") {
            context.insert("url".into(), word.to_string());
        }
    }
    
    // Extract quoted values (first and second quotes)
    let mut in_quote = false;
    let mut quote_start = 0;
    let mut quotes = Vec::new();
    for (i, c) in goal.char_indices() {
        if c == '"' || c == '\'' {
            if in_quote {
                quotes.push(&goal[quote_start + 1..i]);
                in_quote = false;
            } else {
                quote_start = i;
                in_quote = true;
            }
        }
    }
    
    if let Some(first) = quotes.first() {
        context.insert("value".into(), first.to_string());
    }
    if let Some(second) = quotes.get(1) {
        context.insert("field".into(), second.to_string());
    }
    
    // Extract email patterns
    for word in &words {
        if word.contains('@') && word.contains('.') {
            context.insert("email".into(), word.to_string());
            context.insert("value".into(), word.to_string());
        }
    }
    
    context
}

3. AGENT LOOP — Observation-Only Loop (NO Action Execution)

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 11-86)

pub async fn agent_loop(
    page: &Page,
    goal: &str,
    max_steps: usize,
    verify_js: Option<&str>,
) -> Result<Value> {
    let mut steps = Vec::new();
    
    for step_num in 0..max_steps {
        // ✓ OBSERVE: Get page state
        let url = page.url().await.ok().flatten().unwrap_or_default();
        let title = evaluate_js(page, "document.title").await?;
        let title_str = title.as_str().unwrap_or("").to_string();
        
        // Get interactive elements count
        let elems_js = r#"
            (() => {
                const interactive = document.querySelectorAll('a, button, input, select, textarea, [role="button"], [role="link"], [tabindex]');
                const forms = document.querySelectorAll('form');
                const visible = Array.from(interactive).filter(el => {
                    const rect = el.getBoundingClientRect();
                    return rect.width > 0 && rect.height > 0;
                });
                return {
                    total_interactive: interactive.length,
                    visible_interactive: visible.length,
                    forms: forms.length,
                    body_text_length: document.body?.innerText?.length || 0
                };
            })()
        "#;
        let observation = evaluate_js(page, elems_js).await?;
        
        // ✓ VERIFY: Check if goal is met
        let mut verified = false;
        let mut verify_result = Value::Null;
        
        if let Some(js) = verify_js {
            let vr = evaluate_js(page, js).await?;
            let vr_str = match &vr {
                Value::Bool(b) => b.to_string(),
                Value::String(s) => s.clone(),
                other => other.to_string(),
            };
            verified = vr_str == "true" || vr_str.contains("true");
            verify_result = vr;
        }
        
        let step = serde_json::json!({
            "step": step_num + 1,
            "url": url,
            "title": title_str,
            "observation": observation,
            "goal": goal,
            "verified": verified,
            "verify_result": verify_result
        });
        steps.push(step);
        
        if verified {
            return Ok(serde_json::json!({
                "status": "goal_achieved",
                "total_steps": step_num + 1,
                "goal": goal,
                "steps": steps
            }));
        }
    }
    
    Ok(serde_json::json!({
        "status": "max_steps_reached",
        "total_steps": max_steps,
        "goal": goal,
        "steps": steps
    }))
}

KEY CHARACTERISTICS:

  • ✗ No PLANNING phase
  • ✗ No ACTION execution
  • ✓ Only OBSERVEVERIFY cycle
  • Used for tracing/monitoring, NOT autonomous automation
  • Returns observation snapshots at each step

4. AGENT THINK — Analysis & Recommendations (NO Execution)

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 370-440)

pub async fn think(page: &Page) -> Result<Value> {
    let js = r#"
        (() => {
            const state = {
                url: window.location.href,
                title: document.title,
                ready: document.readyState,
                scroll: { x: window.scrollX, y: window.scrollY, maxY: document.documentElement.scrollHeight - window.innerHeight },
                viewport: { w: window.innerWidth, h: window.innerHeight }
            };
            
            const buttons = document.querySelectorAll('button, [role="button"]');
            const links = document.querySelectorAll('a[href]');
            const inputs = document.querySelectorAll('input, textarea, select');
            const forms = document.querySelectorAll('form');
            
            // Find prominent CTAs
            const ctas = Array.from(buttons).filter(b => {
                const rect = b.getBoundingClientRect();
                return rect.width > 50 && rect.height > 20 && rect.top < window.innerHeight;
            }).map(b => ({
                text: (b.innerText || '').trim().substring(0, 50),
                tag: b.tagName.toLowerCase(),
                type: b.type || '',
                disabled: b.disabled
            })).slice(0, 10);
            
            // Find empty required inputs
            const emptyInputs = Array.from(inputs).filter(i => {
                return i.required && !i.value && i.getBoundingClientRect().width > 0;
            }).map(i => ({
                name: i.name || i.id || i.placeholder || i.type,
                type: i.type
            })).slice(0, 10);
            
            // Detect page characteristics
            const hasLogin = !!(document.querySelector('[type="password"]') || document.querySelector('form[action*="login"]'));
            const hasSearch = !!(document.querySelector('[type="search"]') || document.querySelector('[name="q"]'));
            const hasModal = !!(document.querySelector('[role="dialog"]') || document.querySelector('.modal.show'));
            const hasCaptcha = !!(document.querySelector('[class*="captcha"]') || document.querySelector('iframe[src*="captcha"]'));
            const isLoading = !!(document.querySelector('.loading, .spinner, [aria-busy="true"]'));
            
            const analysis = {
                page_type: hasLogin ? 'login_page' : hasSearch ? 'search_page' : hasModal ? 'modal_open' : 'content_page',
                state,
                interactive: {
                    buttons: buttons.length,
                    links: links.length,
                    inputs: inputs.length,
                    forms: forms.length
                },
                prominent_ctas: ctas,
                empty_required: emptyInputs,
                flags: { hasLogin, hasSearch, hasModal, hasCaptcha, isLoading },
                recommendations: []
            };
            
            // Build recommendations based on page state
            if (hasCaptcha) analysis.recommendations.push({ action: 'solve_captcha', priority: 'high', reason: 'CAPTCHA detected' });
            if (hasModal) analysis.recommendations.push({ action: 'dismiss_modal', priority: 'high', reason: 'Modal blocking interaction' });
            if (isLoading) analysis.recommendations.push({ action: 'wait', priority: 'high', reason: 'Page still loading' });
            if (emptyInputs.length > 0) analysis.recommendations.push({ action: 'fill_form', priority: 'medium', reason: `${emptyInputs.length} required inputs empty` });
            if (hasLogin) analysis.recommendations.push({ action: 'authenticate', priority: 'medium', reason: 'Login form detected' });
            if (ctas.length > 0) analysis.recommendations.push({ action: 'click_cta', priority: 'low', reason: `${ctas.length} CTAs available` });
            if (state.scroll.maxY > 0 && state.scroll.y === 0) analysis.recommendations.push({ action: 'scroll_explore', priority: 'low', reason: 'Page has scrollable content' });
            
            return JSON.stringify(analysis);
        })()
    "#.to_string();
    
    let result = page.evaluate(js).await?;
    let raw: String = result.into_value().unwrap_or_else(|_| "{}".to_string());
    Ok(serde_json::from_str(&raw).unwrap_or(serde_json::json!({})))
}

KEY CHARACTERISTICS:

  • ✓ Identifies page_type: "login_page", "search_page", "modal_open", "content_page"
  • ✓ Detects flags: hasLogin, hasSearch, hasModal, hasCaptcha, isLoading
  • ✓ Provides recommendations with priority levels (high/medium/low)
  • ✗ Does NOT execute any actions
  • Used for analysis and decision-making, not automation

5. AGENT CHAIN — Executes Pre-Written JS Actions

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 287-367)

pub async fn auto_chain(
    page: &Page,
    actions: &[String],
    on_error: &str,
    max_retries: usize,
) -> Result<Value> {
    let mut results = Vec::new();
    
    for (i, action_js) in actions.iter().enumerate() {
        let mut success = false;
        let mut last_err = String::new();
        let mut attempts = 0;
        
        // Retry loop for each action
        for attempt in 0..=max_retries {
            attempts = attempt + 1;
            match page.evaluate(action_js.to_string()).await {
                Ok(val) => {
                    let r: String = val.into_value().unwrap_or_else(|_| "null".to_string());
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "success",
                        "result": r,
                        "attempts": attempts
                    }));
                    success = true;
                    break;
                }
                Err(e) => {
                    last_err = e.to_string();
                    if on_error != "retry" || attempt == max_retries {
                        break;
                    }
                    tokio::time::sleep(std::time::Duration::from_millis(500)).await;
                }
            }
        }
        
        // Handle failure based on on_error strategy
        if !success {
            match on_error {
                "skip" => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "skipped",
                        "error": last_err,
                        "attempts": attempts
                    }));
                }
                "abort" => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "aborted",
                        "error": last_err,
                        "attempts": attempts
                    }));
                    return Ok(serde_json::json!({
                        "status": "aborted",
                        "completed_steps": i,
                        "total_steps": actions.len(),
                        "results": results
                    }));
                }
                _ => {
                    results.push(serde_json::json!({
                        "step": i + 1,
                        "status": "failed",
                        "error": last_err,
                        "attempts": attempts
                    }));
                }
            }
        }
    }
    
    let all_ok = results.iter().all(|r| r["status"] == "success");
    Ok(serde_json::json!({
        "status": if all_ok { "all_success" } else { "partial" },
        "completed_steps": results.len(),
        "total_steps": actions.len(),
        "results": results
    }))
}

ERROR HANDLING STRATEGIES:

  • "skip" (default): Continue to next action if one fails
  • "retry": Retry failed action up to max_retries times
  • "abort": Stop execution immediately on first failure

6. AGENT OBSERVE — Annotated Page State

File: /Users/giulioleone/Sviluppo/onecrawl-dev/packages/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 160-232)

pub async fn annotated_observe(page: &Page) -> Result<Value> {
    let js = r#"
        (() => {
            const elements = [];
            const interactive = document.querySelectorAll(
                'a, button, input, select, textarea, [role="button"], [role="link"], [role="checkbox"], [role="radio"], [role="combobox"], [role="menuitem"], [tabindex]:not([tabindex="-1"])'
            );
            
            let ref_counter = 0;
            interactive.forEach(el => {
                const rect = el.getBoundingClientRect();
                if (rect.width === 0 && rect.height === 0) return;
                
                ref_counter++;
                const ref_id = `@e${ref_counter}`;
                
                const tag = el.tagName.toLowerCase();
                const role = el.getAttribute('role') || '';
                const text = (el.innerText || el.textContent || '').trim().substring(0, 100);
                const ariaLabel = el.getAttribute('aria-label') || '';
                const placeholder = el.getAttribute('placeholder') || '';
                const type = el.getAttribute('type') || '';
                const href = el.getAttribute('href') || '';
                const name = el.getAttribute('name') || '';
                const id = el.id || '';
                const value = el.value || '';
                const disabled = el.disabled || false;
                const checked = el.checked || false;
                
                elements.push({
                    ref: ref_id,
                    tag,
                    role,
                    text,
                    aria_label: ariaLabel,
                    placeholder,
                    type,
                    href,
                    name,
                    id,
                    value,
                    disabled,
                    checked,
                    bounds: {
                        x: Math.round(rect.x),
                        y: Math.round(rect.y),
                        width: Math.round(rect.width),
                        height: Math.round(rect.height),
                        center_x: Math.round(rect.x + rect.width / 2),
                        center_y: Math.round(rect.y + rect.height / 2)
                    }
                });
            });
            
            const url = window.location.href;
            const title = document.title;
            const viewport = { width: window.innerWidth, height: window.innerHeight };
            const scroll = { x: window.scrollX, y: window.scrollY, max_y: document.documentElement.scrollHeight - window.innerHeight };
            
            return {
                url,
                title,
                viewport,
                scroll,
                elements,
                element_count: elements.length,
                timestamp: Date.now()
            };
        })()
    "#;
    
    evaluate_js(page, js).await
}

RETURNED STRUCTURE:

  • elements: Array with @e1, @e2, ... references
  • Each element includes: tag, role, text, bounds (x, y, width, height, center_x, center_y)
  • viewport & scroll state
  • timestamp

7. SMART ACTIONS — Intelligent Element Finding

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/smart_actions.rs (193 lines)

smart_find() — Multi-Strategy Element Discovery

Uses 5 strategies in order of confidence:

  1. Exact Text Match (confidence: 1.0)
    • Matches button/link text exactly
  2. Fuzzy Text Match (confidence: 0.5-0.8)
    • Partial text matches, similarity-scored
  3. ARIA Role Match (confidence: 0.6)
    • Matches aria-label and role attributes
  4. Attribute Match (confidence: 0.4-0.7)
    • Matches placeholder, name, id, title, alt, aria-label
  5. CSS Selector (confidence: 0.95)
    • If query looks like a CSS selector (starts with . or # or contains [)

Returns deduplicated results sorted by confidence (up to 10 matches).


SHARED AGENT UTILITIES

Agent Memory: AgentMemory

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/agent_memory.rs (443 lines)

pub struct MemoryEntry {
    pub key: String,
    pub value: serde_json::Value,
    pub category: MemoryCategory,  // PageVisit, ElementPattern, DomainStrategy, RetryKnowledge, etc.
    pub domain: Option<String>,
    pub created_at: u64,
    pub accessed_at: u64,
    pub access_count: u64,
    pub ttl_seconds: Option<u64>,
}

pub enum MemoryCategory {
    PageVisit,         // Track visited pages
    ElementPattern,    // Learned selectors for elements
    DomainStrategy,    // Domain-specific strategies (login flows, popups, etc.)
    RetryKnowledge,    // What worked after a failure
    UserPreference,    // User-provided settings
    SelectorMapping,   // Element → selector mappings
    ErrorPattern,      // Common errors and solutions
    Custom,            // User-defined
}

Session Context

File: /Users/giulioleone/Sviluppo/onecrawl-rust/crates/onecrawl-cdp/src/agent.rs (lines 235-284)

Stores/retrieves session data in window.__onecrawl_ctx:

  • "set" — Store a key-value pair
  • "get" — Retrieve a value
  • "get_all" — Retrieve all context
  • "clear" — Clear all context

FULL ACTION VOCABULARY (Available to agent auto)

✓ navigate          — Go to URL
✓ click             — Click element (CSS selector-based)
✓ type              — Type text into element
✓ smart_click       — Click element (multi-strategy discovery)
✓ smart_fill        — Fill element with text (multi-strategy discovery)
✓ wait              — Wait for element to appear
✓ snapshot          — Take page snapshot (URL, title, interactive count)
✓ extract           — Extract element content (text, HTML, tag)
✓ assert            — Verify condition (url_contains, title_contains, element_exists, text_contains)
✓ scroll            — Scroll page (up/down/left/right by amount)
✓ screenshot        — Capture screenshot
✓ memory_store      — Store data in persistent memory
✓ memory_recall     — Retrieve data from memory
✓ conditional       — If/else branching

COMPARISON MATRIX

Feature agent auto agent loop agent think agent chain agent observe
Planning ✓ (task_planner)
Observes
Executes ✓ (pre-written)
Recommends (via plan)
Retries ✓ (3x per step) ✓ (configurable)
Memory
Cost tracking
Screenshot ✓ (optional)
Resume

PROBLEMS & RECOMMENDATIONS

Problem 1: agent loop is observation-only

Why: It's designed for tracing/monitoring, not automation. Fix: Use agent auto for autonomous execution.

Problem 2: agent auto planner may be too generic

Why: task_planner::plan_from_goal() uses keyword matching → may misclassify goals. Fix:

  • Add domain-specific context extraction (e.g., recognize "login to Gmail" vs generic "login")
  • Use memory to store successful strategies for repeated domains
  • Add confidence scoring to step generation

Problem 3: Missing dynamic observation during planning

Why: Task planner doesn't see current page state when generating steps. Fix:

  • Pass annotated_observe() result to planner
  • Use current page state to refine action targets
  • Update plan mid-execution based on think() output

Problem 4: Limited error recovery

Why: Retries just repeat same action (no adaptation). Fix:

  • Use fallback actions from PlannedStep::fallback
  • Consult memory for alternative selectors when primary fails
  • Call think() to get recommendations on failure

FILE LOCATIONS SUMMARY

  • Agent Auto (planning + execution): crates/onecrawl-cdp/src/agent_auto.rs
  • Task Planner (goal→steps): crates/onecrawl-cdp/src/task_planner.rs
  • Agent Core (loop, think, chain, observe): crates/onecrawl-cdp/src/agent.rs
  • Memory System: crates/onecrawl-cdp/src/agent_memory.rs
  • Smart Actions: crates/onecrawl-cdp/src/smart_actions.rs
  • CLI Wrapper: crates/onecrawl-cli-rs/src/commands/browser/agent_computer.rs
  • CLI Enum: crates/onecrawl-cli-rs/src/cli/agent.rs

___BEGIN___COMMAND_DONE_MARKER___0