Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/agent/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ func createToolRegistry(workspace string, restrict bool, cfg *config.Config, msg
}
registry.Register(tools.NewWebFetchTool(50000))

// Browser automation tool (agent-browser CLI)
if cfg.Tools.Browser.Enabled {
registry.Register(tools.NewBrowserTool(tools.BrowserToolOptions{
Session: cfg.Tools.Browser.Session,
Headless: cfg.Tools.Browser.Headless,
Timeout: cfg.Tools.Browser.Timeout,
CDPPort: cfg.Tools.Browser.CDPPort,
}))
}

// Hardware tools (I2C, SPI) - Linux only, returns error on other platforms
registry.Register(tools.NewI2CTool())
registry.Register(tools.NewSPITool())
Expand Down
17 changes: 16 additions & 1 deletion pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,17 @@ type WebToolsConfig struct {
DuckDuckGo DuckDuckGoConfig `json:"duckduckgo"`
}

type BrowserConfig struct {
Enabled bool `json:"enabled" env:"PICOCLAW_TOOLS_BROWSER_ENABLED"`
Session string `json:"session" env:"PICOCLAW_TOOLS_BROWSER_SESSION"`
Headless bool `json:"headless" env:"PICOCLAW_TOOLS_BROWSER_HEADLESS"`
Timeout int `json:"timeout" env:"PICOCLAW_TOOLS_BROWSER_TIMEOUT"`
CDPPort int `json:"cdp_port" env:"PICOCLAW_TOOLS_BROWSER_CDP_PORT"`
}

type ToolsConfig struct {
Web WebToolsConfig `json:"web"`
Web WebToolsConfig `json:"web"`
Browser BrowserConfig `json:"browser"`
}

func DefaultConfig() *Config {
Expand Down Expand Up @@ -322,6 +331,12 @@ func DefaultConfig() *Config {
MaxResults: 5,
},
},
Browser: BrowserConfig{
Enabled: false,
Headless: true,
Timeout: 30,
CDPPort: 9222,
},
},
Heartbeat: HeartbeatConfig{
Enabled: true,
Expand Down
229 changes: 229 additions & 0 deletions pkg/tools/browser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
package tools

import (
"bytes"
"context"
"fmt"
"os/exec"
"strings"
"time"
)

// BrowserToolOptions configures the BrowserTool.
type BrowserToolOptions struct {
Session string // Session name for isolation
Headless bool // Run in headless mode (default true)
Timeout int // Command timeout in seconds (default 30)
CDPPort int // Chrome DevTools Protocol port (default 9222)
}
Comment on lines +13 to +18
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BrowserToolOptions says Headless has a default of true, but NewBrowserTool currently uses the bool zero-value (false) when opts.Headless isn’t explicitly set, which makes the tool run in headed mode by default (because buildArgs adds --headed when !t.headless). Either implement an explicit default-to-headless behavior (e.g., tri-state/pointer bool) or update the option comment/tests/docs so the default behavior is unambiguous and consistent.

Copilot uses AI. Check for mistakes.

// BrowserTool wraps the agent-browser CLI for headless browser automation.
// It delegates all browser complexity to the external `agent-browser` binary.
type BrowserTool struct {
session string
headless bool
timeout time.Duration
cdpPort int
}

// NewBrowserTool creates a new BrowserTool with the given options.
func NewBrowserTool(opts BrowserToolOptions) *BrowserTool {
timeout := 30
if opts.Timeout > 0 {
timeout = opts.Timeout
}
cdpPort := 9222
if opts.CDPPort > 0 {
cdpPort = opts.CDPPort
}
return &BrowserTool{
session: opts.Session,
headless: opts.Headless,
timeout: time.Duration(timeout) * time.Second,
cdpPort: cdpPort,
}
}

func (t *BrowserTool) Name() string {
return "browser"
}

func (t *BrowserTool) Description() string {
return `Automate a headless browser via agent-browser CLI. Pass the subcommand as 'command'.
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Description() starts with "Automate a headless browser…", but the tool can run headed mode when configured (via --headed when Headless=false). Consider adjusting the wording so it doesn’t promise headless operation unconditionally.

Suggested change
return `Automate a headless browser via agent-browser CLI. Pass the subcommand as 'command'.
return `Automate a browser (headless by default) via the agent-browser CLI. Pass the subcommand as 'command'.

Copilot uses AI. Check for mistakes.
The browser daemon persists between calls — open a page first, then interact with it.

Core workflow:
browser open <url> → Navigate to URL
browser snapshot -i → Get interactive elements with refs (@e1, @e2, ...)
browser click @e2 → Click element by ref
browser fill @e3 "text" → Fill input by ref
browser type @e3 "text" → Type into element
browser press Enter → Press a key
browser screenshot [path] → Take screenshot
browser get text @e1 → Get text content of element
browser get title → Get page title
browser get url → Get current URL
browser eval "js code" → Run JavaScript
browser scroll down [px] → Scroll page
browser wait <selector|ms> → Wait for element or time
browser close → Close browser

CSS selectors also work: browser click "#submit"

Examples:
command: "open https://example.com"
command: "snapshot -i"
command: "click @e2"
command: "fill @e3 \"user@example.com\""
command: "get title"
command: "screenshot /tmp/page.png"
command: "close"`
}

func (t *BrowserTool) Parameters() map[string]interface{} {
return map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"command": map[string]interface{}{
"type": "string",
"description": "The agent-browser subcommand to execute (e.g. 'open https://example.com', 'snapshot -i', 'click @e2')",
},
},
"required": []string{"command"},
}
}

func (t *BrowserTool) Execute(ctx context.Context, args map[string]interface{}) *ToolResult {
command, ok := args["command"].(string)
if !ok || strings.TrimSpace(command) == "" {
return ErrorResult("command is required (e.g. 'open https://example.com')")
}

// Build the full agent-browser command line
cmdArgs := t.buildArgs(command)
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Execute() only validates that the raw command string is non-empty, but buildArgs/splitCommand can still return an empty subcommand (e.g., command set to "" or just quotes). In that case this will invoke agent-browser with only global flags, which is likely to fail with a confusing error. Consider validating that the parsed cmdArgs has at least 1 token and returning a clear ErrorResult if not.

Suggested change
cmdArgs := t.buildArgs(command)
cmdArgs := t.buildArgs(command)
if len(cmdArgs) == 0 {
return ErrorResult("parsed command is empty; provide an agent-browser subcommand (e.g. 'open https://example.com')")
}

Copilot uses AI. Check for mistakes.

cmdCtx, cancel := context.WithTimeout(ctx, t.timeout)
defer cancel()

cmd := exec.CommandContext(cmdCtx, "agent-browser", cmdArgs...)

var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr

err := cmd.Run()
output := stdout.String()
if stderr.Len() > 0 {
errOut := stderr.String()
// Filter out noise from stderr (daemon startup messages, etc.)
if !strings.Contains(errOut, "Daemon started") {
if output != "" {
output += "\n"
}
output += errOut
}
}

if err != nil {
if cmdCtx.Err() == context.DeadlineExceeded {
msg := fmt.Sprintf("Browser command timed out after %v: %s", t.timeout, command)
return &ToolResult{
ForLLM: msg,
ForUser: msg,
IsError: true,
}
}
// Include output even on error — agent-browser often puts useful info in stdout
if output == "" {
output = fmt.Sprintf("command failed: %v", err)
} else {
output += fmt.Sprintf("\nExit code: %v", err)
}
}

if output == "" {
output = "(no output)"
}

// Truncate long output
maxLen := 10000
if len(output) > maxLen {
output = output[:maxLen] + fmt.Sprintf("\n... (truncated, %d more chars)", len(output)-maxLen)
}

if err != nil {
return &ToolResult{
ForLLM: output,
ForUser: output,
IsError: true,
}
}

return &ToolResult{
ForLLM: output,
ForUser: output,
IsError: false,
}
}

// buildArgs constructs the argument list for the agent-browser command.
// It splits the user command string and prepends global flags.
func (t *BrowserTool) buildArgs(command string) []string {
var globalArgs []string

// Add CDP port
globalArgs = append(globalArgs, "--cdp", fmt.Sprintf("%d", t.cdpPort))

// Add session flag if configured
if t.session != "" {
globalArgs = append(globalArgs, "--session", t.session)
}

// Add --headed if not headless (agent-browser defaults to headless)
if !t.headless {
globalArgs = append(globalArgs, "--headed")
}

// Add --json for machine-readable output
globalArgs = append(globalArgs, "--json")

// Parse the command string into arguments, respecting quotes
cmdArgs := splitCommand(command)

return append(globalArgs, cmdArgs...)
}

// splitCommand splits a command string into arguments, respecting quoted strings.
func splitCommand(command string) []string {
var args []string
var current strings.Builder
inQuote := false
quoteChar := byte(0)

for i := 0; i < len(command); i++ {
ch := command[i]
switch {
case inQuote:
if ch == quoteChar {
inQuote = false
} else {
current.WriteByte(ch)
}
case ch == '"' || ch == '\'':
inQuote = true
quoteChar = ch
case ch == ' ' || ch == '\t':
if current.Len() > 0 {
args = append(args, current.String())
current.Reset()
}
default:
current.WriteByte(ch)
}
}
if current.Len() > 0 {
args = append(args, current.String())
}
Comment on lines +223 to +226
Copy link

Copilot AI Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

splitCommand currently drops empty quoted arguments. For example, fill @e3 "" will produce no argument for the empty string because the final append is gated on current.Len() > 0. This breaks commands where an empty string is a valid parameter; consider tracking whether an argument was quoted so empty quoted args are preserved, and add a unit test for this case.

Copilot uses AI. Check for mistakes.

return args
}
Loading