From 7e55680c6b21fe1c149e95e03681a06abda32681 Mon Sep 17 00:00:00 2001 From: ItsT0ng Date: Sat, 28 Feb 2026 22:46:06 +1100 Subject: [PATCH 1/5] fix(pkg/providers):do regex precompile insteadd on the fly --- pkg/providers/error_classifier.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pkg/providers/error_classifier.go b/pkg/providers/error_classifier.go index a0f003006c..89842f3394 100644 --- a/pkg/providers/error_classifier.go +++ b/pkg/providers/error_classifier.go @@ -6,6 +6,12 @@ import ( "strings" ) +// Common patterns in Go HTTP error messages +var httpStatusPatterns = []*regexp.Regexp{ + regexp.MustCompile(`status[:\s]+(\d{3})`), + regexp.MustCompile(`HTTP[/\s]+\d*\.?\d*\s+(\d{3})`), +} + // errorPattern defines a single pattern (string or regex) for error classification. type errorPattern struct { substring string @@ -200,18 +206,11 @@ func classifyByMessage(msg string) FailoverReason { // extractHTTPStatus extracts an HTTP status code from an error message. // Looks for patterns like "status: 429", "status 429", "HTTP 429", or standalone "429". func extractHTTPStatus(msg string) int { - // Common patterns in Go HTTP error messages - patterns := []*regexp.Regexp{ - regexp.MustCompile(`status[:\s]+(\d{3})`), - regexp.MustCompile(`HTTP[/\s]+\d*\.?\d*\s+(\d{3})`), - } - - for _, p := range patterns { + for _, p := range httpStatusPatterns { if m := p.FindStringSubmatch(msg); len(m) > 1 { return parseDigits(m[1]) } } - return 0 } From a45408ff670ec63240c4d020541b3f87b183f7a4 Mon Sep 17 00:00:00 2001 From: ItsT0ng Date: Sat, 28 Feb 2026 23:06:23 +1100 Subject: [PATCH 2/5] fix(providers): replace HTTP-specific regex with standalone status code matcher The precompiled HTTP regex used uppercase "HTTP" which never matched because ClassifyError lowercases the input. Replace it with a case-insensitive word-boundary pattern that matches any standalone 3-digit status code (300-599), which also subsumes the HTTP/x.x case. Add test case for standalone status code extraction. --- pkg/providers/error_classifier.go | 4 ++-- pkg/providers/error_classifier_test.go | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/providers/error_classifier.go b/pkg/providers/error_classifier.go index 89842f3394..3b13a7f04a 100644 --- a/pkg/providers/error_classifier.go +++ b/pkg/providers/error_classifier.go @@ -9,7 +9,7 @@ import ( // Common patterns in Go HTTP error messages var httpStatusPatterns = []*regexp.Regexp{ regexp.MustCompile(`status[:\s]+(\d{3})`), - regexp.MustCompile(`HTTP[/\s]+\d*\.?\d*\s+(\d{3})`), + regexp.MustCompile(`\b([3-5]\d{2})\b`), } // errorPattern defines a single pattern (string or regex) for error classification. @@ -204,7 +204,7 @@ func classifyByMessage(msg string) FailoverReason { } // extractHTTPStatus extracts an HTTP status code from an error message. -// Looks for patterns like "status: 429", "status 429", "HTTP 429", or standalone "429". +// Looks for patterns like "status: 429", "status 429", "HTTP/1.1 429", "HTTP 429", or standalone "429". func extractHTTPStatus(msg string) int { for _, p := range httpStatusPatterns { if m := p.FindStringSubmatch(msg); len(m) > 1 { diff --git a/pkg/providers/error_classifier_test.go b/pkg/providers/error_classifier_test.go index 865aea57ad..82db7d31df 100644 --- a/pkg/providers/error_classifier_test.go +++ b/pkg/providers/error_classifier_test.go @@ -306,6 +306,7 @@ func TestExtractHTTPStatus(t *testing.T) { {"status: 429 rate limited", 429}, {"status 401 unauthorized", 401}, {"HTTP/1.1 502 Bad Gateway", 502}, + {"error 429", 429}, {"no status code here", 0}, {"random number 12345", 0}, } From c3edb3d1d33462e654d3c2b87540ee26146b49f1 Mon Sep 17 00:00:00 2001 From: ItsT0ng Date: Sat, 28 Feb 2026 23:15:12 +1100 Subject: [PATCH 3/5] fix(providers): restore http regex and add standalone status code matcher Restore the http-prefixed regex (without unnecessary (?i) flag since input is already lowercased by ClassifyError) as a mid-priority pattern to reduce false positives. Add a standalone word-boundary matcher as a fallback for bare status codes like "429". Fix test to use lowercased input matching the actual calling convention. --- pkg/providers/error_classifier.go | 3 ++- pkg/providers/error_classifier_test.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/providers/error_classifier.go b/pkg/providers/error_classifier.go index 3b13a7f04a..fd9bf1e81f 100644 --- a/pkg/providers/error_classifier.go +++ b/pkg/providers/error_classifier.go @@ -9,6 +9,7 @@ import ( // Common patterns in Go HTTP error messages var httpStatusPatterns = []*regexp.Regexp{ regexp.MustCompile(`status[:\s]+(\d{3})`), + regexp.MustCompile(`http[/\s]+\d*\.?\d*\s+(\d{3})`), regexp.MustCompile(`\b([3-5]\d{2})\b`), } @@ -204,7 +205,7 @@ func classifyByMessage(msg string) FailoverReason { } // extractHTTPStatus extracts an HTTP status code from an error message. -// Looks for patterns like "status: 429", "status 429", "HTTP/1.1 429", "HTTP 429", or standalone "429". +// Looks for patterns like "status: 429", "status 429", "http/1.1 429", "http 429", or standalone "429". func extractHTTPStatus(msg string) int { for _, p := range httpStatusPatterns { if m := p.FindStringSubmatch(msg); len(m) > 1 { diff --git a/pkg/providers/error_classifier_test.go b/pkg/providers/error_classifier_test.go index 82db7d31df..67d9af62b2 100644 --- a/pkg/providers/error_classifier_test.go +++ b/pkg/providers/error_classifier_test.go @@ -305,7 +305,7 @@ func TestExtractHTTPStatus(t *testing.T) { }{ {"status: 429 rate limited", 429}, {"status 401 unauthorized", 401}, - {"HTTP/1.1 502 Bad Gateway", 502}, + {"http/1.1 502 bad gateway", 502}, {"error 429", 429}, {"no status code here", 0}, {"random number 12345", 0}, From bdc8ef1f9d3a3372dc4ccc638d8406b807edf41a Mon Sep 17 00:00:00 2001 From: ItsT0ng Date: Sun, 1 Mar 2026 00:48:10 +1100 Subject: [PATCH 4/5] perf(tools): move path regex compilation from per-call to package init The path regex in guardCommand was compiled on every call. Hoist it to a package-level var (absolutePathPattern) alongside defaultDenyPatterns in a single var block, so it is compiled once at init time. --- pkg/tools/shell.go | 96 ++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/pkg/tools/shell.go b/pkg/tools/shell.go index b52433b6f8..1b6fbf4ac2 100644 --- a/pkg/tools/shell.go +++ b/pkg/tools/shell.go @@ -24,50 +24,55 @@ type ExecTool struct { restrictToWorkspace bool } -var defaultDenyPatterns = []*regexp.Regexp{ - regexp.MustCompile(`\brm\s+-[rf]{1,2}\b`), - regexp.MustCompile(`\bdel\s+/[fq]\b`), - regexp.MustCompile(`\brmdir\s+/s\b`), - regexp.MustCompile(`\b(format|mkfs|diskpart)\b\s`), // Match disk wiping commands (must be followed by space/args) - regexp.MustCompile(`\bdd\s+if=`), - regexp.MustCompile(`>\s*/dev/sd[a-z]\b`), // Block writes to disk devices (but allow /dev/null) - regexp.MustCompile(`\b(shutdown|reboot|poweroff)\b`), - regexp.MustCompile(`:\(\)\s*\{.*\};\s*:`), - regexp.MustCompile(`\$\([^)]+\)`), - regexp.MustCompile(`\$\{[^}]+\}`), - regexp.MustCompile("`[^`]+`"), - regexp.MustCompile(`\|\s*sh\b`), - regexp.MustCompile(`\|\s*bash\b`), - regexp.MustCompile(`;\s*rm\s+-[rf]`), - regexp.MustCompile(`&&\s*rm\s+-[rf]`), - regexp.MustCompile(`\|\|\s*rm\s+-[rf]`), - regexp.MustCompile(`>\s*/dev/null\s*>&?\s*\d?`), - regexp.MustCompile(`<<\s*EOF`), - regexp.MustCompile(`\$\(\s*cat\s+`), - regexp.MustCompile(`\$\(\s*curl\s+`), - regexp.MustCompile(`\$\(\s*wget\s+`), - regexp.MustCompile(`\$\(\s*which\s+`), - regexp.MustCompile(`\bsudo\b`), - regexp.MustCompile(`\bchmod\s+[0-7]{3,4}\b`), - regexp.MustCompile(`\bchown\b`), - regexp.MustCompile(`\bpkill\b`), - regexp.MustCompile(`\bkillall\b`), - regexp.MustCompile(`\bkill\s+-[9]\b`), - regexp.MustCompile(`\bcurl\b.*\|\s*(sh|bash)`), - regexp.MustCompile(`\bwget\b.*\|\s*(sh|bash)`), - regexp.MustCompile(`\bnpm\s+install\s+-g\b`), - regexp.MustCompile(`\bpip\s+install\s+--user\b`), - regexp.MustCompile(`\bapt\s+(install|remove|purge)\b`), - regexp.MustCompile(`\byum\s+(install|remove)\b`), - regexp.MustCompile(`\bdnf\s+(install|remove)\b`), - regexp.MustCompile(`\bdocker\s+run\b`), - regexp.MustCompile(`\bdocker\s+exec\b`), - regexp.MustCompile(`\bgit\s+push\b`), - regexp.MustCompile(`\bgit\s+force\b`), - regexp.MustCompile(`\bssh\b.*@`), - regexp.MustCompile(`\beval\b`), - regexp.MustCompile(`\bsource\s+.*\.sh\b`), -} +var ( + defaultDenyPatterns = []*regexp.Regexp{ + regexp.MustCompile(`\brm\s+-[rf]{1,2}\b`), + regexp.MustCompile(`\bdel\s+/[fq]\b`), + regexp.MustCompile(`\brmdir\s+/s\b`), + regexp.MustCompile(`\b(format|mkfs|diskpart)\b\s`), // Match disk wiping commands (must be followed by space/args) + regexp.MustCompile(`\bdd\s+if=`), + regexp.MustCompile(`>\s*/dev/sd[a-z]\b`), // Block writes to disk devices (but allow /dev/null) + regexp.MustCompile(`\b(shutdown|reboot|poweroff)\b`), + regexp.MustCompile(`:\(\)\s*\{.*\};\s*:`), + regexp.MustCompile(`\$\([^)]+\)`), + regexp.MustCompile(`\$\{[^}]+\}`), + regexp.MustCompile("`[^`]+`"), + regexp.MustCompile(`\|\s*sh\b`), + regexp.MustCompile(`\|\s*bash\b`), + regexp.MustCompile(`;\s*rm\s+-[rf]`), + regexp.MustCompile(`&&\s*rm\s+-[rf]`), + regexp.MustCompile(`\|\|\s*rm\s+-[rf]`), + regexp.MustCompile(`>\s*/dev/null\s*>&?\s*\d?`), + regexp.MustCompile(`<<\s*EOF`), + regexp.MustCompile(`\$\(\s*cat\s+`), + regexp.MustCompile(`\$\(\s*curl\s+`), + regexp.MustCompile(`\$\(\s*wget\s+`), + regexp.MustCompile(`\$\(\s*which\s+`), + regexp.MustCompile(`\bsudo\b`), + regexp.MustCompile(`\bchmod\s+[0-7]{3,4}\b`), + regexp.MustCompile(`\bchown\b`), + regexp.MustCompile(`\bpkill\b`), + regexp.MustCompile(`\bkillall\b`), + regexp.MustCompile(`\bkill\s+-[9]\b`), + regexp.MustCompile(`\bcurl\b.*\|\s*(sh|bash)`), + regexp.MustCompile(`\bwget\b.*\|\s*(sh|bash)`), + regexp.MustCompile(`\bnpm\s+install\s+-g\b`), + regexp.MustCompile(`\bpip\s+install\s+--user\b`), + regexp.MustCompile(`\bapt\s+(install|remove|purge)\b`), + regexp.MustCompile(`\byum\s+(install|remove)\b`), + regexp.MustCompile(`\bdnf\s+(install|remove)\b`), + regexp.MustCompile(`\bdocker\s+run\b`), + regexp.MustCompile(`\bdocker\s+exec\b`), + regexp.MustCompile(`\bgit\s+push\b`), + regexp.MustCompile(`\bgit\s+force\b`), + regexp.MustCompile(`\bssh\b.*@`), + regexp.MustCompile(`\beval\b`), + regexp.MustCompile(`\bsource\s+.*\.sh\b`), + } + + // absolutePathPattern matches absolute file paths in commands (Unix and Windows). + absolutePathPattern = regexp.MustCompile(`[A-Za-z]:\\[^\\\"']+|/[^\s\"']+`) +) func NewExecTool(workingDir string, restrict bool) (*ExecTool, error) { return NewExecToolWithConfig(workingDir, restrict, nil) @@ -287,8 +292,7 @@ func (t *ExecTool) guardCommand(command, cwd string) string { return "" } - pathPattern := regexp.MustCompile(`[A-Za-z]:\\[^\\\"']+|/[^\s\"']+`) - matches := pathPattern.FindAllString(cmd, -1) + matches := absolutePathPattern.FindAllString(cmd, -1) for _, raw := range matches { p, err := filepath.Abs(raw) From 10f142c5a6a5c0fcf4be2c164641ec3c0a0f88cc Mon Sep 17 00:00:00 2001 From: ItsT0ng Date: Sun, 1 Mar 2026 23:04:49 +1100 Subject: [PATCH 5/5] style(tools): move inline comment to fix golines formatting error --- pkg/tools/shell.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/tools/shell.go b/pkg/tools/shell.go index 1b6fbf4ac2..2fd22353fb 100644 --- a/pkg/tools/shell.go +++ b/pkg/tools/shell.go @@ -29,7 +29,10 @@ var ( regexp.MustCompile(`\brm\s+-[rf]{1,2}\b`), regexp.MustCompile(`\bdel\s+/[fq]\b`), regexp.MustCompile(`\brmdir\s+/s\b`), - regexp.MustCompile(`\b(format|mkfs|diskpart)\b\s`), // Match disk wiping commands (must be followed by space/args) + // Match disk wiping commands (must be followed by space/args) + regexp.MustCompile( + `\b(format|mkfs|diskpart)\b\s`, + ), regexp.MustCompile(`\bdd\s+if=`), regexp.MustCompile(`>\s*/dev/sd[a-z]\b`), // Block writes to disk devices (but allow /dev/null) regexp.MustCompile(`\b(shutdown|reboot|poweroff)\b`),