Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/examples/code/fill_and_submit_web_form_crawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import asyncio
import json
from urllib.parse import urlencode

from crawlee import Request
from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext
Expand All @@ -19,7 +19,8 @@ async def request_handler(context: HttpCrawlingContext) -> None:
request = Request.from_url(
url='https://httpbin.org/post',
method='POST',
payload=json.dumps(
headers={'content-type': 'application/x-www-form-urlencoded'},
payload=urlencode(
{
'custname': 'John Doe',
'custtel': '1234567890',
Expand Down
5 changes: 3 additions & 2 deletions docs/examples/code/fill_and_submit_web_form_request.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import json
from urllib.parse import urlencode

from crawlee import Request

# Prepare a POST request to the form endpoint.
request = Request.from_url(
url='https://httpbin.org/post',
method='POST',
payload=json.dumps(
headers={'content-type': 'application/x-www-form-urlencoded'},
payload=urlencode(
{
'custname': 'John Doe',
'custtel': '1234567890',
Expand Down
16 changes: 6 additions & 10 deletions src/crawlee/_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,7 @@
from enum import IntEnum
from typing import Annotated, Any, cast

from pydantic import (
BaseModel,
BeforeValidator,
ConfigDict,
Field,
PlainSerializer,
PlainValidator,
TypeAdapter,
)
from pydantic import BaseModel, BeforeValidator, ConfigDict, Field, PlainSerializer, PlainValidator, TypeAdapter
from typing_extensions import Self

from crawlee._types import EnqueueStrategy, HttpHeaders, HttpMethod, HttpPayload, JsonSerializable
Expand Down Expand Up @@ -143,7 +135,11 @@ class BaseRequestData(BaseModel):
headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
"""HTTP request headers."""

payload: HttpPayload | None = None
payload: Annotated[
HttpPayload | None,
BeforeValidator(lambda v: v.encode() if isinstance(v, str) else v),
PlainSerializer(lambda v: v.decode() if isinstance(v, bytes) else None),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is scary - could you return v in the else branch instead?

] = None
"""HTTP request payload."""

user_data: Annotated[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,20 @@ async def test_list_head(request_queue_client: RequestQueueClient) -> None:


async def test_request_state_serialization(request_queue_client: RequestQueueClient) -> None:
request = Request.from_url('https://crawlee.dev')
request = Request.from_url('https://crawlee.dev', payload=b'test')
request.state = RequestState.UNPROCESSED

await request_queue_client.add_request(request)

result = await request_queue_client.list_head()

assert len(result.items) == 1
assert result.items[0] == request

got_request = await request_queue_client.get_request(request.id)

assert request == got_request
assert request.payload == got_request.payload
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comparison is part of Request.__eq__, so this assertion accomplishes nothing.

assert got_request.payload != b"b'test'"


async def test_add_record(request_queue_client: RequestQueueClient) -> None:
Expand Down
Loading