Skip to content

Commit 83dbff7

Browse files
authored
Merge pull request #883 from afjcjsbx/fix/max-payload-size-in-web-fetch
fix: max payload size in web fetch
2 parents fc9f1ec + e066730 commit 83dbff7

File tree

5 files changed

+123
-27
lines changed

5 files changed

+123
-27
lines changed

pkg/agent/loop.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ func registerSharedTools(
119119
} else if searchTool != nil {
120120
agent.Tools.Register(searchTool)
121121
}
122-
fetchTool, err := tools.NewWebFetchToolWithProxy(50000, cfg.Tools.Web.Proxy)
122+
fetchTool, err := tools.NewWebFetchToolWithProxy(50000, cfg.Tools.Web.Proxy, cfg.Tools.Web.FetchLimitBytes)
123123
if err != nil {
124124
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
125125
} else {

pkg/config/config.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,8 @@ type WebToolsConfig struct {
523523
Perplexity PerplexityConfig `json:"perplexity"`
524524
// Proxy is an optional proxy URL for web tools (http/https/socks5/socks5h).
525525
// For authenticated proxies, prefer HTTP_PROXY/HTTPS_PROXY env vars instead of embedding credentials in config.
526-
Proxy string `json:"proxy,omitempty" env:"PICOCLAW_TOOLS_WEB_PROXY"`
526+
Proxy string `json:"proxy,omitempty" env:"PICOCLAW_TOOLS_WEB_PROXY"`
527+
FetchLimitBytes int64 `json:"fetch_limit_bytes,omitempty" env:"PICOCLAW_TOOLS_WEB_FETCH_LIMIT_BYTES"`
527528
}
528529

529530
type CronToolsConfig struct {

pkg/config/defaults.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,8 @@ func DefaultConfig() *Config {
315315
Interval: 5,
316316
},
317317
Web: WebToolsConfig{
318-
Proxy: "",
318+
Proxy: "",
319+
FetchLimitBytes: 10 * 1024 * 1024, // 10MB by default
319320
Brave: BraveConfig{
320321
Enabled: false,
321322
APIKey: "",

pkg/tools/web.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"bytes"
55
"context"
66
"encoding/json"
7+
"errors"
78
"fmt"
89
"io"
910
"net/http"
@@ -519,18 +520,18 @@ func (t *WebSearchTool) Execute(ctx context.Context, args map[string]any) *ToolR
519520
}
520521

521522
type WebFetchTool struct {
522-
maxChars int
523-
proxy string
524-
client *http.Client
523+
maxChars int
524+
proxy string
525+
client *http.Client
526+
fetchLimitBytes int64
525527
}
526528

527-
func NewWebFetchTool(maxChars int) *WebFetchTool {
529+
func NewWebFetchTool(maxChars int, fetchLimitBytes int64) (*WebFetchTool, error) {
528530
// createHTTPClient cannot fail with an empty proxy string.
529-
tool, _ := NewWebFetchToolWithProxy(maxChars, "")
530-
return tool
531+
return NewWebFetchToolWithProxy(maxChars, "", fetchLimitBytes)
531532
}
532533

533-
func NewWebFetchToolWithProxy(maxChars int, proxy string) (*WebFetchTool, error) {
534+
func NewWebFetchToolWithProxy(maxChars int, proxy string, fetchLimitBytes int64) (*WebFetchTool, error) {
534535
if maxChars <= 0 {
535536
maxChars = defaultMaxChars
536537
}
@@ -544,10 +545,14 @@ func NewWebFetchToolWithProxy(maxChars int, proxy string) (*WebFetchTool, error)
544545
}
545546
return nil
546547
}
548+
if fetchLimitBytes <= 0 {
549+
fetchLimitBytes = 10 * 1024 * 1024 // Security Fallback
550+
}
547551
return &WebFetchTool{
548-
maxChars: maxChars,
549-
proxy: proxy,
550-
client: client,
552+
maxChars: maxChars,
553+
proxy: proxy,
554+
client: client,
555+
fetchLimitBytes: fetchLimitBytes,
551556
}, nil
552557
}
553558

@@ -614,10 +619,17 @@ func (t *WebFetchTool) Execute(ctx context.Context, args map[string]any) *ToolRe
614619
if err != nil {
615620
return ErrorResult(fmt.Sprintf("request failed: %v", err))
616621
}
622+
623+
resp.Body = http.MaxBytesReader(nil, resp.Body, t.fetchLimitBytes)
624+
617625
defer resp.Body.Close()
618626

619627
body, err := io.ReadAll(resp.Body)
620628
if err != nil {
629+
var maxBytesErr *http.MaxBytesError
630+
if errors.As(err, &maxBytesErr) {
631+
return ErrorResult(fmt.Sprintf("failed to read response: size exceeded %d bytes limit", t.fetchLimitBytes))
632+
}
621633
return ErrorResult(fmt.Sprintf("failed to read response: %v", err))
622634
}
623635

pkg/tools/web_test.go

Lines changed: 96 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
11
package tools
22

33
import (
4+
"bytes"
45
"context"
56
"encoding/json"
7+
"fmt"
68
"net/http"
79
"net/http/httptest"
810
"strings"
911
"testing"
1012
"time"
13+
14+
"github.com/sipeed/picoclaw/pkg/logger"
1115
)
1216

17+
const testFetchLimit = int64(10 * 1024 * 1024)
18+
1319
// TestWebTool_WebFetch_Success verifies successful URL fetching
1420
func TestWebTool_WebFetch_Success(t *testing.T) {
1521
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -19,7 +25,11 @@ func TestWebTool_WebFetch_Success(t *testing.T) {
1925
}))
2026
defer server.Close()
2127

22-
tool := NewWebFetchTool(50000)
28+
tool, err := NewWebFetchTool(50000, testFetchLimit)
29+
if err != nil {
30+
t.Fatalf("Failed to create web fetch tool: %v", err)
31+
}
32+
2333
ctx := context.Background()
2434
args := map[string]any{
2535
"url": server.URL,
@@ -55,7 +65,11 @@ func TestWebTool_WebFetch_JSON(t *testing.T) {
5565
}))
5666
defer server.Close()
5767

58-
tool := NewWebFetchTool(50000)
68+
tool, err := NewWebFetchTool(50000, testFetchLimit)
69+
if err != nil {
70+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
71+
}
72+
5973
ctx := context.Background()
6074
args := map[string]any{
6175
"url": server.URL,
@@ -76,7 +90,11 @@ func TestWebTool_WebFetch_JSON(t *testing.T) {
7690

7791
// TestWebTool_WebFetch_InvalidURL verifies error handling for invalid URL
7892
func TestWebTool_WebFetch_InvalidURL(t *testing.T) {
79-
tool := NewWebFetchTool(50000)
93+
tool, err := NewWebFetchTool(50000, testFetchLimit)
94+
if err != nil {
95+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
96+
}
97+
8098
ctx := context.Background()
8199
args := map[string]any{
82100
"url": "not-a-valid-url",
@@ -97,7 +115,11 @@ func TestWebTool_WebFetch_InvalidURL(t *testing.T) {
97115

98116
// TestWebTool_WebFetch_UnsupportedScheme verifies error handling for non-http URLs
99117
func TestWebTool_WebFetch_UnsupportedScheme(t *testing.T) {
100-
tool := NewWebFetchTool(50000)
118+
tool, err := NewWebFetchTool(50000, testFetchLimit)
119+
if err != nil {
120+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
121+
}
122+
101123
ctx := context.Background()
102124
args := map[string]any{
103125
"url": "ftp://example.com/file.txt",
@@ -118,7 +140,11 @@ func TestWebTool_WebFetch_UnsupportedScheme(t *testing.T) {
118140

119141
// TestWebTool_WebFetch_MissingURL verifies error handling for missing URL
120142
func TestWebTool_WebFetch_MissingURL(t *testing.T) {
121-
tool := NewWebFetchTool(50000)
143+
tool, err := NewWebFetchTool(50000, testFetchLimit)
144+
if err != nil {
145+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
146+
}
147+
122148
ctx := context.Background()
123149
args := map[string]any{}
124150

@@ -146,7 +172,11 @@ func TestWebTool_WebFetch_Truncation(t *testing.T) {
146172
}))
147173
defer server.Close()
148174

149-
tool := NewWebFetchTool(1000) // Limit to 1000 chars
175+
tool, err := NewWebFetchTool(1000, testFetchLimit) // Limit to 1000 chars
176+
if err != nil {
177+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
178+
}
179+
150180
ctx := context.Background()
151181
args := map[string]any{
152182
"url": server.URL,
@@ -174,6 +204,49 @@ func TestWebTool_WebFetch_Truncation(t *testing.T) {
174204
}
175205
}
176206

207+
func TestWebFetchTool_PayloadTooLarge(t *testing.T) {
208+
// Create a mock HTTP server
209+
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
210+
w.Header().Set("Content-Type", "text/html")
211+
w.WriteHeader(http.StatusOK)
212+
213+
// Generate a payload intentionally larger than our limit.
214+
// Limit: 10 * 1024 * 1024 (10MB). We generate 10MB + 100 bytes of the letter 'A'.
215+
largeData := bytes.Repeat([]byte("A"), int(testFetchLimit)+100)
216+
217+
w.Write(largeData)
218+
}))
219+
// Ensure the server is shut down at the end of the test
220+
defer ts.Close()
221+
222+
// Initialize the tool
223+
tool, err := NewWebFetchTool(50000, testFetchLimit)
224+
if err != nil {
225+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
226+
}
227+
228+
// Prepare the arguments pointing to the URL of our local mock server
229+
args := map[string]any{
230+
"url": ts.URL,
231+
}
232+
233+
// Execute the tool
234+
ctx := context.Background()
235+
result := tool.Execute(ctx, args)
236+
237+
// Assuming ErrorResult sets the ForLLM field with the error text.
238+
if result == nil {
239+
t.Fatal("expected a ToolResult, got nil")
240+
}
241+
242+
// Search for the exact error string we set earlier in the Execute method
243+
expectedErrorMsg := fmt.Sprintf("size exceeded %d bytes limit", testFetchLimit)
244+
245+
if !strings.Contains(result.ForLLM, expectedErrorMsg) && !strings.Contains(result.ForUser, expectedErrorMsg) {
246+
t.Errorf("test failed: expected error %q, but got: %+v", expectedErrorMsg, result)
247+
}
248+
}
249+
177250
// TestWebTool_WebSearch_NoApiKey verifies that no tool is created when API key is missing
178251
func TestWebTool_WebSearch_NoApiKey(t *testing.T) {
179252
tool, err := NewWebSearchTool(WebSearchToolOptions{BraveEnabled: true, BraveAPIKey: ""})
@@ -224,7 +297,11 @@ func TestWebTool_WebFetch_HTMLExtraction(t *testing.T) {
224297
}))
225298
defer server.Close()
226299

227-
tool := NewWebFetchTool(50000)
300+
tool, err := NewWebFetchTool(50000, testFetchLimit)
301+
if err != nil {
302+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
303+
}
304+
228305
ctx := context.Background()
229306
args := map[string]any{
230307
"url": server.URL,
@@ -325,7 +402,11 @@ func TestWebFetchTool_extractText(t *testing.T) {
325402

326403
// TestWebTool_WebFetch_MissingDomain verifies error handling for URL without domain
327404
func TestWebTool_WebFetch_MissingDomain(t *testing.T) {
328-
tool := NewWebFetchTool(50000)
405+
tool, err := NewWebFetchTool(50000, testFetchLimit)
406+
if err != nil {
407+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
408+
}
409+
329410
ctx := context.Background()
330411
args := map[string]any{
331412
"url": "https://",
@@ -447,21 +528,22 @@ func TestCreateHTTPClient_ProxyFromEnvironmentWhenConfigEmpty(t *testing.T) {
447528
}
448529

449530
func TestNewWebFetchToolWithProxy(t *testing.T) {
450-
tool, err := NewWebFetchToolWithProxy(1024, "http://127.0.0.1:7890")
531+
tool, err := NewWebFetchToolWithProxy(1024, "http://127.0.0.1:7890", testFetchLimit)
451532
if err != nil {
452-
t.Fatalf("NewWebFetchToolWithProxy() error: %v", err)
453-
}
454-
if tool.maxChars != 1024 {
533+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
534+
} else if tool.maxChars != 1024 {
455535
t.Fatalf("maxChars = %d, want %d", tool.maxChars, 1024)
456536
}
537+
457538
if tool.proxy != "http://127.0.0.1:7890" {
458539
t.Fatalf("proxy = %q, want %q", tool.proxy, "http://127.0.0.1:7890")
459540
}
460541

461-
tool, err = NewWebFetchToolWithProxy(0, "http://127.0.0.1:7890")
542+
tool, err = NewWebFetchToolWithProxy(0, "http://127.0.0.1:7890", testFetchLimit)
462543
if err != nil {
463-
t.Fatalf("NewWebFetchToolWithProxy() error: %v", err)
544+
logger.ErrorCF("agent", "Failed to create web fetch tool", map[string]any{"error": err.Error()})
464545
}
546+
465547
if tool.maxChars != 50000 {
466548
t.Fatalf("default maxChars = %d, want %d", tool.maxChars, 50000)
467549
}

0 commit comments

Comments
 (0)