From 30835132c10964c9917775b0b65b4cc59e84286a Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Thu, 31 Oct 2024 15:50:34 +0100
Subject: [PATCH 1/2] chore!: rm Request.query_params field

closes: #615
---
 docs/examples/fill_and_submit_web_form.mdx   |  2 +-
 src/crawlee/_request.py                      | 13 +------------
 src/crawlee/_types.py                        |  2 --
 src/crawlee/http_clients/_base.py            |  4 +---
 src/crawlee/http_clients/_httpx.py           |  5 +----
 src/crawlee/http_clients/curl_impersonate.py |  5 +----
 6 files changed, 5 insertions(+), 26 deletions(-)
diff --git a/docs/examples/fill_and_submit_web_form.mdx b/docs/examples/fill_and_submit_web_form.mdx
index 8498bb3014..b5d77ff5d3 100644
--- a/docs/examples/fill_and_submit_web_form.mdx
+++ b/docs/examples/fill_and_submit_web_form.mdx
@@ -46,7 +46,7 @@ Now, let's create a POST request with the form fields and their values using the
     {RequestExample}
 </CodeBlock>
 
-Alternatively, you can send form data as URL parameters using the `query_params` argument. It depends on the form and how it is implemented. However, sending the data as a POST request body using the `payload` is generally a better approach.
+Alternatively, you can send form data as URL parameters using the `url` argument. It depends on the form and how it is implemented. However, sending the data as a POST request body using the `payload` is generally a better approach.
 
 ## Implementing the crawler
 
diff --git a/src/crawlee/_request.py b/src/crawlee/_request.py
index 0d6bad09f9..4b4af29cdb 100644
--- a/src/crawlee/_request.py
+++ b/src/crawlee/_request.py
@@ -19,7 +19,7 @@
 )
 from typing_extensions import Self
 
-from crawlee._types import EnqueueStrategy, HttpHeaders, HttpMethod, HttpPayload, HttpQueryParams, JsonSerializable
+from crawlee._types import EnqueueStrategy, HttpHeaders, HttpMethod, HttpPayload, JsonSerializable
 from crawlee._utils.crypto import crypto_random_object_id
 from crawlee._utils.requests import compute_unique_key, unique_key_to_request_id
 from crawlee._utils.urls import extract_query_params, validate_http_url
@@ -139,9 +139,6 @@ class BaseRequestData(BaseModel):
     headers: Annotated[HttpHeaders, Field(default_factory=HttpHeaders)] = HttpHeaders()
     """HTTP request headers."""
 
-    query_params: Annotated[HttpQueryParams, Field(alias='queryParams', default_factory=dict)] = {}
-    """URL query parameters."""
-
     payload: HttpPayload | None = None
     """HTTP request payload."""
 
@@ -182,7 +179,6 @@ def from_url(
         *,
         method: HttpMethod = 'GET',
         headers: HttpHeaders | None = None,
-        query_params: HttpQueryParams | None = None,
         payload: HttpPayload | None = None,
         label: str | None = None,
         unique_key: str | None = None,
@@ -193,7 +189,6 @@ def from_url(
     ) -> Self:
         """Create a new `BaseRequestData` instance from a URL. See `Request.from_url` for more details."""
         headers = headers or HttpHeaders()
-        query_params = query_params or {}
 
         unique_key = unique_key or compute_unique_key(
             url,
@@ -212,7 +207,6 @@ def from_url(
             id=id,
             method=method,
             headers=headers,
-            query_params=query_params,
             payload=payload,
             **kwargs,
         )
@@ -276,7 +270,6 @@ def from_url(
         *,
         method: HttpMethod = 'GET',
         headers: HttpHeaders | None = None,
-        query_params: HttpQueryParams | None = None,
         payload: HttpPayload | None = None,
         label: str | None = None,
         unique_key: str | None = None,
@@ -297,7 +290,6 @@ def from_url(
             url: The URL of the request.
             method: The HTTP method of the request.
             headers: The HTTP headers of the request.
-            query_params: The query parameters of the URL.
             payload: The data to be sent as the request body. Typically used with 'POST' or 'PUT' requests.
             label: A custom label to differentiate between request types. This is stored in `user_data`, and it is
                 used for request routing (different requests go to different handlers).
@@ -317,7 +309,6 @@ def from_url(
             raise ValueError('`always_enqueue` cannot be used with a custom `unique_key`')
 
         headers = headers or HttpHeaders()
-        query_params = query_params or {}
 
         unique_key = unique_key or compute_unique_key(
             url,
@@ -339,7 +330,6 @@ def from_url(
             id=id,
             method=method,
             headers=headers,
-            query_params=query_params,
             payload=payload,
             **kwargs,
         )
@@ -440,7 +430,6 @@ def __eq__(self, other: object) -> bool:
                 and self.unique_key == other.unique_key
                 and self.method == other.method
                 and self.headers == other.headers
-                and self.query_params == other.query_params
                 and self.payload == other.payload
                 and self.user_data == other.user_data
                 and self.retry_count == other.retry_count
diff --git a/src/crawlee/_types.py b/src/crawlee/_types.py
index d63da27683..d69b297634 100644
--- a/src/crawlee/_types.py
+++ b/src/crawlee/_types.py
@@ -50,8 +50,6 @@
 
 HttpMethod: TypeAlias = Literal['GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'CONNECT', 'OPTIONS', 'TRACE', 'PATCH']
 
-HttpQueryParams: TypeAlias = dict[str, str]
-
 HttpPayload: TypeAlias = bytes
 
 
diff --git a/src/crawlee/http_clients/_base.py b/src/crawlee/http_clients/_base.py
index 4c07902c87..bfbc8de4ea 100644
--- a/src/crawlee/http_clients/_base.py
+++ b/src/crawlee/http_clients/_base.py
@@ -10,7 +10,7 @@
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    from crawlee._types import HttpHeaders, HttpMethod, HttpPayload, HttpQueryParams
+    from crawlee._types import HttpHeaders, HttpMethod, HttpPayload
     from crawlee.base_storage_client._models import Request
     from crawlee.proxy_configuration import ProxyInfo
     from crawlee.sessions import Session
@@ -112,7 +112,6 @@ async def send_request(
         *,
         method: HttpMethod = 'GET',
         headers: HttpHeaders | None = None,
-        query_params: HttpQueryParams | None = None,
         payload: HttpPayload | None = None,
         session: Session | None = None,
         proxy_info: ProxyInfo | None = None,
@@ -125,7 +124,6 @@ async def send_request(
             url: The URL to send the request to.
             method: The HTTP method to use.
             headers: The headers to include in the request.
-            query_params: The query parameters to include in the request.
             payload: The data to be sent as the request body.
             session: The session associated with the request.
             proxy_info: The information about the proxy to be used.
diff --git a/src/crawlee/http_clients/_httpx.py b/src/crawlee/http_clients/_httpx.py
index 321fd33a47..e401b1125a 100644
--- a/src/crawlee/http_clients/_httpx.py
+++ b/src/crawlee/http_clients/_httpx.py
@@ -16,7 +16,7 @@
 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    from crawlee._types import HttpMethod, HttpPayload, HttpQueryParams
+    from crawlee._types import HttpMethod, HttpPayload
     from crawlee.base_storage_client._models import Request
     from crawlee.proxy_configuration import ProxyInfo
     from crawlee.statistics import Statistics
@@ -141,7 +141,6 @@ async def crawl(
             url=request.url,
             method=request.method,
             headers=headers,
-            params=request.query_params,
             content=request.payload,
             cookies=session.cookies if session else None,
             extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
@@ -176,7 +175,6 @@ async def send_request(
         *,
         method: HttpMethod = 'GET',
         headers: HttpHeaders | None = None,
-        query_params: HttpQueryParams | None = None,
         payload: HttpPayload | None = None,
         session: Session | None = None,
         proxy_info: ProxyInfo | None = None,
@@ -188,7 +186,6 @@ async def send_request(
             url=url,
             method=method,
             headers=dict(headers) if headers else None,
-            params=query_params,
             content=payload,
             extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
         )
diff --git a/src/crawlee/http_clients/curl_impersonate.py b/src/crawlee/http_clients/curl_impersonate.py
index 51e434ff9c..5fe2ca179f 100644
--- a/src/crawlee/http_clients/curl_impersonate.py
+++ b/src/crawlee/http_clients/curl_impersonate.py
@@ -26,7 +26,7 @@
 
     from curl_cffi.requests import Response
 
-    from crawlee._types import HttpMethod, HttpQueryParams
+    from crawlee._types import HttpMethod
     from crawlee.base_storage_client._models import Request
     from crawlee.proxy_configuration import ProxyInfo
     from crawlee.sessions import Session
@@ -130,7 +130,6 @@ async def crawl(
                 url=request.url,
                 method=request.method.upper(),  # type: ignore # curl-cffi requires uppercase method
                 headers=request.headers,
-                params=request.query_params,
                 data=request.payload,
                 cookies=session.cookies if session else None,
                 allow_redirects=True,
@@ -162,7 +161,6 @@ async def send_request(
         *,
         method: HttpMethod = 'GET',
         headers: HttpHeaders | None = None,
-        query_params: HttpQueryParams | None = None,
         payload: HttpPayload | None = None,
         session: Session | None = None,
         proxy_info: ProxyInfo | None = None,
@@ -175,7 +173,6 @@ async def send_request(
                 url=url,
                 method=method.upper(),  # type: ignore # curl-cffi requires uppercase method
                 headers=dict(headers) if headers else None,
-                params=query_params,
                 data=payload,
                 cookies=session.cookies if session else None,
                 allow_redirects=True,

From 76be589f3da5469c52f974351da7de95850278db Mon Sep 17 00:00:00 2001
From: Vlada Dusek <v.dusek96@gmail.com>
Date: Thu, 31 Oct 2024 16:48:49 +0100
Subject: [PATCH 2/2] add test

---
 tests/unit/http_crawler/test_http_crawler.py | 37 ++++++++++++++++++--
 1 file changed, 35 insertions(+), 2 deletions(-)

diff --git a/tests/unit/http_crawler/test_http_crawler.py b/tests/unit/http_crawler/test_http_crawler.py
index afc9e79c7b..e29bff323c 100644
--- a/tests/unit/http_crawler/test_http_crawler.py
+++ b/tests/unit/http_crawler/test_http_crawler.py
@@ -246,7 +246,7 @@ async def request_handler(context: HttpCrawlingContext) -> None:
     await crawler.run([request])
 
     # The request handler should be called once.
-    assert len(responses) == 1
+    assert len(responses) == 1, 'The request handler should be called once.'
 
     # The reconstructed payload data should match the original payload. We have to flatten the values, because
     # parse_qs returns a list of values for each key.
@@ -254,4 +254,37 @@ async def request_handler(context: HttpCrawlingContext) -> None:
         k: v[0] if len(v) == 1 else v for k, v in parse_qs(responses[0]['data'].strip("b'").strip("'")).items()
     }
 
-    assert response_data == payload
+    assert response_data == payload, 'The reconstructed payload data should match the original payload.'
+
+
+@pytest.mark.parametrize(
+    'http_client_class',
+    [CurlImpersonateHttpClient, HttpxHttpClient],
+    ids=['curl', 'httpx'],
+)
+async def test_sending_url_query_params(http_client_class: type[BaseHttpClient]) -> None:
+    http_client = http_client_class()
+    crawler = HttpCrawler(http_client=http_client)
+
+    responses = []
+
+    @crawler.router.default_handler
+    async def request_handler(context: HttpCrawlingContext) -> None:
+        response = json.loads(context.http_response.read())
+        # The httpbin.org/get endpoint returns the provided query parameters in the response.
+        responses.append(response)
+
+    base_url = 'https://httpbin.org/get'
+    query_params = {'param1': 'value1', 'param2': 'value2'}
+    request = Request.from_url(url=f'{base_url}?{urlencode(query_params)}')
+
+    await crawler.run([request])
+
+    # The request handler should be called once.
+    assert len(responses) == 1, 'The request handler should be called once.'
+
+    # Validate the response query parameters.
+    response_args = responses[0]['args']
+    assert (
+        response_args == query_params
+    ), 'The reconstructed query parameters should match the original query parameters.'