88from crawlee import service_locator
99from crawlee .configuration import Configuration
1010from crawlee .crawlers import HttpCrawler , HttpCrawlingContext
11+ from crawlee .statistics import Statistics
1112from crawlee .storage_clients import MemoryStorageClient
1213from crawlee .storage_clients ._file_system ._storage_client import FileSystemStorageClient
1314
@@ -35,16 +36,41 @@ def test_global_configuration_works_reversed() -> None:
3536 )
3637
3738
38- async def test_storage_not_persisted_when_disabled (tmp_path : Path , server_url : URL ) -> None :
39+ async def test_storage_not_persisted_when_non_persistable_storage_used (tmp_path : Path , server_url : URL ) -> None :
40+ """Make the Crawler use MemoryStorageClient which can't persist state."""
41+ service_locator .set_configuration (
42+ Configuration (
43+ crawlee_storage_dir = str (tmp_path ), # type: ignore[call-arg]
44+ )
45+ )
46+ crawler = HttpCrawler (storage_client = MemoryStorageClient ())
47+
48+ @crawler .router .default_handler
49+ async def default_handler (context : HttpCrawlingContext ) -> None :
50+ await context .push_data ({'url' : context .request .url })
51+
52+ await crawler .run ([str (server_url )])
53+
54+ # Verify that no files were created in the storage directory.
55+ content = list (tmp_path .iterdir ())
56+ assert content == [], 'Expected the storage directory to be empty, but it is not.'
57+
58+
59+ async def test_storage_persisted_with_explicit_statistics_with_persistable_storage (
60+ tmp_path : Path , server_url : URL
61+ ) -> None :
62+ """Make the Crawler use MemoryStorageClient which can't persist state,
63+ but pass explicit statistics to it which will use global FileSystemStorageClient() that can persist state."""
64+
3965 configuration = Configuration (
4066 crawlee_storage_dir = str (tmp_path ), # type: ignore[call-arg]
4167 )
42- storage_client = MemoryStorageClient ()
43-
4468 service_locator .set_configuration (configuration )
45- service_locator .set_storage_client (storage_client )
69+ service_locator .set_storage_client (FileSystemStorageClient () )
4670
47- crawler = HttpCrawler ()
71+ crawler = HttpCrawler (
72+ storage_client = MemoryStorageClient (), statistics = Statistics .with_default_state (persistence_enabled = True )
73+ )
4874
4975 @crawler .router .default_handler
5076 async def default_handler (context : HttpCrawlingContext ) -> None :
@@ -54,7 +80,7 @@ async def default_handler(context: HttpCrawlingContext) -> None:
5480
5581 # Verify that no files were created in the storage directory.
5682 content = list (tmp_path .iterdir ())
57- assert content == [], 'Expected the storage directory to be empty , but it is not.'
83+ assert content != [], 'Expected the storage directory to contain files , but it does not.'
5884
5985
6086async def test_storage_persisted_when_enabled (tmp_path : Path , server_url : URL ) -> None :
0 commit comments