apify · Pijukatel · Aug 21, 2025 · Aug 19, 2025 · Aug 20, 2025 · Aug 20, 2025
diff --git a/docs/upgrading/upgrading_to_v1.md b/docs/upgrading/upgrading_to_v1.md
@@ -5,6 +5,7 @@ title: Upgrading to v1
 
 This page summarizes the breaking changes between Crawlee for Python v0.6 and v1.0.
 
+
 ## Terminology change: "browser" in different contexts
 
 The word "browser" is now used distinctly in two contexts:
@@ -102,6 +103,7 @@ Some older methods have been removed or replaced:
 
 - `from_storage_object` - removed; use the `open` method with either a `name` or `id` instead.
 - `get_info` and `storage_object` - replaced by the new `get_metadata` method.
+- `get_request` has argument `unique_key` instead of `request_id` as the `id` field was removed from the `Request`.
 - `set_metadata` method has been removed.
 
 Some changes in the related model classes:
@@ -200,6 +202,11 @@ We drop support for Python 3.9. The minimum supported version is now Python 3.10
 
 The fields `persist_storage` and `persist_metadata` have been removed from the `Configuration`. Persistence is now determined only by which storage client class you use.
 
+### Changes in Request
+
+## Removed `id` field from `Request`
+`Request` objects no longer have `id` field and all its usages have been transferred to `unique_key` field.
+
 ### Changes in HttpResponse
 
 The method `HttpResponse.read` is now asynchronous. This affects all HTTP-based crawlers.

diff --git a/src/crawlee/_request.py b/src/crawlee/_request.py
@@ -11,7 +11,7 @@
 from crawlee._types import EnqueueStrategy, HttpHeaders, HttpMethod, HttpPayload, JsonSerializable
 from crawlee._utils.crypto import crypto_random_object_id
 from crawlee._utils.docs import docs_group
-from crawlee._utils.requests import compute_unique_key, unique_key_to_request_id
+from crawlee._utils.requests import compute_unique_key
 from crawlee._utils.urls import validate_http_url
 
 if TYPE_CHECKING:
@@ -165,10 +165,6 @@ class Request(BaseModel):
 
     model_config = ConfigDict(populate_by_name=True)
 
-    id: str
-    """A unique identifier for the request. Note that this is not used for deduplication, and should not be confused
-    with `unique_key`."""
-
     unique_key: Annotated[str, Field(alias='uniqueKey')]
     """A unique key identifying the request. Two requests with the same `unique_key` are considered as pointing
     to the same URL.
@@ -239,7 +235,6 @@ def from_url(
         label: str | None = None,
         session_id: str | None = None,
         unique_key: str | None = None,
-        id: str | None = None,
         keep_url_fragment: bool = False,
         use_extended_unique_key: bool = False,
         always_enqueue: bool = False,
@@ -264,8 +259,6 @@ def from_url(
                 raised.
             unique_key: A unique key identifying the request. If not provided, it is automatically computed based on
                 the URL and other parameters. Requests with the same `unique_key` are treated as identical.
-            id: A unique identifier for the request. If not provided, it is automatically generated from the
-                `unique_key`.
             keep_url_fragment: Determines whether the URL fragment (e.g., `#section`) should be included in
                 the `unique_key` computation. This is only relevant when `unique_key` is not provided.
             use_extended_unique_key: Determines whether to include the HTTP method, ID Session and payload in the
@@ -296,12 +289,9 @@ def from_url(
         if always_enqueue:
             unique_key = f'{unique_key}_{crypto_random_object_id()}'
 
-        id = id or unique_key_to_request_id(unique_key)
-
         request = cls(
             url=url,
             unique_key=unique_key,
-            id=id,
             method=method,
             headers=headers,
             payload=payload,

diff --git a/src/crawlee/_utils/requests.py b/src/crawlee/_utils/requests.py
@@ -1,8 +1,5 @@
 from __future__ import annotations
 
-import re
-from base64 import b64encode
-from hashlib import sha256
 from logging import getLogger
 from typing import TYPE_CHECKING
 
@@ -16,29 +13,6 @@
 logger = getLogger(__name__)
 
 
-def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
-    """Generate a deterministic request ID based on a unique key.
-
-    Args:
-        unique_key: The unique key to convert into a request ID.
-        request_id_length: The length of the request ID.
-
-    Returns:
-        A URL-safe, truncated request ID based on the unique key.
-    """
-    # Encode the unique key and compute its SHA-256 hash
-    hashed_key = sha256(unique_key.encode('utf-8')).digest()
-
-    # Encode the hash in base64 and decode it to get a string
-    base64_encoded = b64encode(hashed_key).decode('utf-8')
-
-    # Remove characters that are not URL-safe ('+', '/', or '=')
-    url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
-
-    # Truncate the key to the desired length
-    return url_safe_key[:request_id_length]
-
-
 def normalize_url(url: str, *, keep_url_fragment: bool = False) -> str:
     """Normalize a URL.
 

diff --git a/src/crawlee/crawlers/_basic/_basic_crawler.py b/src/crawlee/crawlers/_basic/_basic_crawler.py
@@ -1057,7 +1057,7 @@ async def _handle_request_retries(
                 max_retries=3,
             )
             await self._handle_failed_request(context, error)
-            self._statistics.record_request_processing_failure(request.id or request.unique_key)
+            self._statistics.record_request_processing_failure(request.unique_key)
 
     async def _handle_request_error(self, context: TCrawlingContext | BasicCrawlingContext, error: Exception) -> None:
         try:
@@ -1274,7 +1274,7 @@ async def __run_task_function(self) -> None:
 
         if not (await self._is_allowed_based_on_robots_txt_file(request.url)):
             self._logger.warning(
-                f'Skipping request {request.url} ({request.id}) because it is disallowed based on robots.txt'
+                f'Skipping request {request.url} ({request.unique_key}) because it is disallowed based on robots.txt'
             )
 
             await self._handle_skipped_request(request, 'robots_txt', need_mark=True)
@@ -1300,8 +1300,7 @@ async def __run_task_function(self) -> None:
         )
         self._context_result_map[context] = result
 
-        statistics_id = request.id or request.unique_key
-        self._statistics.record_request_processing_start(statistics_id)
+        self._statistics.record_request_processing_start(request.unique_key)
 
         try:
             request.state = RequestState.REQUEST_HANDLER
@@ -1328,7 +1327,7 @@ async def __run_task_function(self) -> None:
             if context.session and context.session.is_usable:
                 context.session.mark_good()
 
-            self._statistics.record_request_processing_finish(statistics_id)
+            self._statistics.record_request_processing_finish(request.unique_key)
 
         except RequestCollisionError as request_error:
             context.request.no_retry = True
@@ -1374,7 +1373,7 @@ async def __run_task_function(self) -> None:
                 )
 
                 await self._handle_failed_request(context, session_error)
-                self._statistics.record_request_processing_failure(statistics_id)
+                self._statistics.record_request_processing_failure(request.unique_key)
 
         except ContextPipelineInterruptedError as interrupted_error:
             self._logger.debug('The context pipeline was interrupted', exc_info=interrupted_error)

diff --git a/src/crawlee/request_loaders/_request_list.py b/src/crawlee/request_loaders/_request_list.py
@@ -166,7 +166,7 @@ async def fetch_next_request(self) -> Request | None:
             return None
 
         state = await self._get_state()
-        state.in_progress.add(self._next[0].id)
+        state.in_progress.add(self._next[0].unique_key)
         self._assumed_total_count += 1
 
         next_request = self._next[0]
@@ -183,7 +183,7 @@ async def fetch_next_request(self) -> Request | None:
     async def mark_request_as_handled(self, request: Request) -> None:
         self._handled_count += 1
         state = await self._get_state()
-        state.in_progress.remove(request.id)
+        state.in_progress.remove(request.unique_key)
 
     async def _ensure_next_request(self) -> None:
         await self._get_state()

diff --git a/src/crawlee/request_loaders/_sitemap_request_loader.py b/src/crawlee/request_loaders/_sitemap_request_loader.py
@@ -153,15 +153,15 @@ async def fetch_next_request(self) -> Request | None:
             url = await self._url_queue.get()
 
             request = Request.from_url(url)
-            self._in_progress.add(request.id)
+            self._in_progress.add(request.unique_key)
             return request
 
         return None
 
     async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
         """Mark a request as successfully handled."""
-        if request.id in self._in_progress:
-            self._in_progress.remove(request.id)
+        if request.unique_key in self._in_progress:
+            self._in_progress.remove(request.unique_key)
             self._handled_count += 1
         return None
 

diff --git a/src/crawlee/storage_clients/_base/_request_queue_client.py b/src/crawlee/storage_clients/_base/_request_queue_client.py
@@ -63,11 +63,11 @@ async def add_batch_of_requests(
         """
 
     @abstractmethod
-    async def get_request(self, request_id: str) -> Request | None:
+    async def get_request(self, unique_key: str) -> Request | None:
         """Retrieve a request from the queue.
 
         Args:
-            request_id: ID of the request to retrieve.
+            unique_key: Unique key of the request to retrieve.
 
         Returns:
             The retrieved request, or None, if it did not exist.