Skip to content

Commit e7449f2

Browse files
authored
fix: add payload to SendRequestFunction to support POST request (#1202)
### Description - add `payload` in `SendRequestFunction` for support `POST` request ### Issues - Related: #1201
1 parent 71d18e0 commit e7449f2

File tree

3 files changed

+18
-6
lines changed

3 files changed

+18
-6
lines changed

src/crawlee/_types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,7 @@ def __call__(
548548
url: str,
549549
*,
550550
method: HttpMethod = 'GET',
551+
payload: HttpPayload | None = None,
551552
headers: HttpHeaders | dict[str, str] | None = None,
552553
) -> Coroutine[None, None, HttpResponse]:
553554
"""Call send request function.
@@ -556,6 +557,7 @@ def __call__(
556557
url: The URL to send the request to.
557558
method: The HTTP method to use.
558559
headers: The headers to include in the request.
560+
payload: The payload to include in the request.
559561
560562
Returns:
561563
The HTTP response received from the server.

src/crawlee/crawlers/_basic/_basic_crawler.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
BasicCrawlingContext,
3232
GetKeyValueStoreFromRequestHandlerFunction,
3333
HttpHeaders,
34+
HttpPayload,
3435
RequestHandlerRunResult,
3536
SendRequestFunction,
3637
SkippedReason,
@@ -1081,11 +1082,13 @@ async def send_request(
10811082
url: str,
10821083
*,
10831084
method: HttpMethod = 'GET',
1085+
payload: HttpPayload | None = None,
10841086
headers: HttpHeaders | dict[str, str] | None = None,
10851087
) -> HttpResponse:
10861088
return await self._http_client.send_request(
10871089
url=url,
10881090
method=method,
1091+
payload=payload,
10891092
headers=headers,
10901093
session=session,
10911094
proxy_info=proxy_info,

tests/unit/crawlers/_basic/test_basic_crawler.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
from crawlee import ConcurrencySettings, Glob, service_locator
2020
from crawlee._request import Request
21-
from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpHeaders
21+
from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs, HttpHeaders, HttpMethod
2222
from crawlee._utils.robots import RobotsTxtFile
2323
from crawlee.configuration import Configuration
2424
from crawlee.crawlers import BasicCrawler
@@ -300,29 +300,36 @@ async def failed_request_handler(context: BasicCrawlingContext, error: Exception
300300
await crawler.run(['http://a.com/', 'http://b.com/', 'http://c.com/'])
301301

302302

303-
async def test_send_request_works(server_url: URL) -> None:
303+
@pytest.mark.parametrize(
304+
('method', 'path', 'payload'),
305+
[
306+
pytest.param('GET', 'get', None, id='get send_request'),
307+
pytest.param('POST', 'post', b'Hello, world!', id='post send_request'),
308+
],
309+
)
310+
async def test_send_request_works(server_url: URL, method: HttpMethod, path: str, payload: None | bytes) -> None:
304311
response_data: dict[str, Any] = {}
305312

306313
crawler = BasicCrawler(max_request_retries=3)
307314

308315
@crawler.router.default_handler
309316
async def handler(context: BasicCrawlingContext) -> None:
310-
response = await context.send_request(str(server_url))
317+
response = await context.send_request(str(server_url / path), method=method, payload=payload)
311318

312-
response_data['body'] = response.read()
319+
response_data['body'] = json.loads(response.read())
313320
response_data['headers'] = response.headers
314321

315322
await crawler.run(['http://a.com/', 'http://b.com/', 'http://c.com/'])
316323

317324
response_body = response_data.get('body')
318325
assert response_body is not None
319-
assert b'Hello, world!' in response_body
326+
assert response_body.get('data') == (payload.decode() if payload else None)
320327

321328
response_headers = response_data.get('headers')
322329
assert response_headers is not None
323330
content_type = response_headers.get('content-type')
324331
assert content_type is not None
325-
assert content_type == 'text/html; charset=utf-8'
332+
assert content_type == 'application/json'
326333

327334

328335
@dataclass

0 commit comments

Comments
 (0)