diff --git a/pkg/tools/shell.go b/pkg/tools/shell.go index 67e2ad257a..198be6d3f1 100644 --- a/pkg/tools/shell.go +++ b/pkg/tools/shell.go @@ -5,6 +5,7 @@ import ( "context" "errors" "fmt" + "net/url" "os" "os/exec" "path/filepath" @@ -12,6 +13,8 @@ import ( "runtime" "strings" "time" + "unicode" + "unicode/utf8" "github.com/sipeed/picoclaw/pkg/config" "github.com/sipeed/picoclaw/pkg/constants" @@ -78,9 +81,13 @@ var ( regexp.MustCompile(`\bsource\s+.*\.sh\b`), } - // absolutePathPattern matches absolute file paths in commands (Unix and Windows). + // absolutePathPattern matches path-like substrings in commands (Unix and Windows). + // A separate boundary check is applied before treating a match as a filesystem path. absolutePathPattern = regexp.MustCompile(`[A-Za-z]:\\[^\\\"']+|/[^\s\"']+`) + // fileURIPathPattern matches file:// URIs that may point at local or UNC paths. + fileURIPathPattern = regexp.MustCompile(`file://[^\s\"']+`) + // safePaths are kernel pseudo-devices that are always safe to reference in // commands, regardless of workspace restriction. They contain no user data // and cannot cause destructive writes. @@ -373,9 +380,54 @@ func (t *ExecTool) guardCommand(command, cwd string) string { return "" } - matches := absolutePathPattern.FindAllString(cmd, -1) + fileURIMatches := fileURIPathPattern.FindAllString(cmd, -1) + for _, rawURI := range fileURIMatches { + p, ok := fileURIToPath(rawURI, cwdPath) + if !ok || safePaths[p] { + continue + } + + rel, err := filepath.Rel(cwdPath, p) + if err != nil { + continue + } + + if strings.HasPrefix(rel, "..") { + return "Command blocked by safety guard (path outside working dir)" + } + } + + // Web URL schemes whose path components (starting with //) should be exempt + // from workspace sandbox checks. file: is intentionally excluded so that + // file:// URIs are still validated against the workspace boundary. + webSchemes := []string{"http:", "https:", "ftp:", "ftps:", "sftp:", "ssh:", "git:"} + + matchIndices := absolutePathPattern.FindAllStringIndex(cmd, -1) + + for _, loc := range matchIndices { + raw := cmd[loc[0]:loc[1]] + + // Skip URL path components that look like they're from web URLs. + // When a URL like "https://github.com" is parsed, the regex captures + // "//github.com" as a match (the path portion after "https:"). + // Use the exact match position (loc[0]) so that duplicate //path substrings + // in the same command are each evaluated at their own position. + if strings.HasPrefix(raw, "//") && loc[0] > 0 { + before := cmd[:loc[0]] + isWebURL := false + + for _, scheme := range webSchemes { + if strings.HasSuffix(before, scheme) { + isWebURL = true + break + } + } + + if isWebURL { + continue + } + } - for _, raw := range matches { p, err := filepath.Abs(raw) if err != nil { continue @@ -385,6 +437,18 @@ func (t *ExecTool) guardCommand(command, cwd string) string { continue } + if !isPathBoundary(cmd, loc[0]) { + continue + } + + if runtime.GOOS == "windows" { + pathVolume := filepath.VolumeName(p) + cwdVolume := filepath.VolumeName(cwdPath) + if pathVolume != "" && cwdVolume != "" && !strings.EqualFold(pathVolume, cwdVolume) { + return "Command blocked by safety guard (path outside working dir)" + } + } + rel, err := filepath.Rel(cwdPath, p) if err != nil { continue @@ -399,6 +463,58 @@ func (t *ExecTool) guardCommand(command, cwd string) string { return "" } +func isPathBoundary(command string, start int) bool { + if start <= 0 { + return true + } + + r, _ := utf8.DecodeLastRuneInString(command[:start]) + if unicode.IsSpace(r) { + return true + } + + return strings.ContainsRune(`"'=<>|&;()[]{},`, r) +} + +func fileURIToPath(rawURI, cwdPath string) (string, bool) { + u, err := url.Parse(rawURI) + if err != nil || u.Scheme != "file" { + return "", false + } + + path := u.Path + if path == "" { + path = u.Opaque + } + + if path == "" { + return "", false + } + + if runtime.GOOS == "windows" && len(path) >= 3 && path[0] == '/' && path[2] == ':' { + path = path[1:] + } + + if u.Host != "" && u.Host != "localhost" { + path = "//" + u.Host + path + } + + path = filepath.FromSlash(path) + + if runtime.GOOS == "windows" && + filepath.VolumeName(path) == "" && + (strings.HasPrefix(path, `\`) || strings.HasPrefix(path, `/`)) { + path = filepath.VolumeName(cwdPath) + path + } + + absPath, err := filepath.Abs(path) + if err != nil { + return "", false + } + + return absPath, true +} + func (t *ExecTool) SetTimeout(timeout time.Duration) { t.timeout = timeout } diff --git a/pkg/tools/shell_test.go b/pkg/tools/shell_test.go index 90265e5bdc..b3fc78b0bb 100644 --- a/pkg/tools/shell_test.go +++ b/pkg/tools/shell_test.go @@ -489,6 +489,53 @@ func TestShellTool_SafePathsInWorkspaceRestriction(t *testing.T) { } } +func TestShellTool_GuardCommand_IgnoresURLPathSegments(t *testing.T) { + tmpDir := t.TempDir() + tool, err := NewExecTool(tmpDir, true) + if err != nil { + t.Fatalf("unable to configure exec tool: %s", err) + } + + commands := []string{ + `curl -s "wttr.in/Beijing?T"`, + `curl -s https://example.com/api/v1/weather`, + } + + for _, cmd := range commands { + if got := tool.guardCommand(cmd, tmpDir); got != "" { + t.Fatalf("guardCommand(%q) = %q, want empty", cmd, got) + } + } +} + +func TestShellTool_GuardCommand_BlocksAbsolutePathOutsideWorkspace(t *testing.T) { + root := t.TempDir() + workspace := filepath.Join(root, "workspace") + outsideDir := filepath.Join(root, "outside") + outsideFile := filepath.Join(outsideDir, "secret.txt") + if err := os.MkdirAll(workspace, 0o755); err != nil { + t.Fatalf("failed to create workspace: %v", err) + } + if err := os.MkdirAll(outsideDir, 0o755); err != nil { + t.Fatalf("failed to create outside dir: %v", err) + } + if err := os.WriteFile(outsideFile, []byte("secret"), 0o644); err != nil { + t.Fatalf("failed to create outside file: %v", err) + } + + tool, err := NewExecTool(workspace, true) + if err != nil { + t.Fatalf("unable to configure exec tool: %s", err) + } + + if got := tool.guardCommand( + `cat "`+outsideFile+`"`, + workspace, + ); !strings.Contains(got, "path outside working dir") { + t.Fatalf("guardCommand should block outside path, got %q", got) + } +} + // TestShellTool_CustomAllowPatterns verifies that custom allow patterns exempt // commands from deny pattern checks. func TestShellTool_CustomAllowPatterns(t *testing.T) {