File tree Expand file tree Collapse file tree 2 files changed +12
-1
lines changed
src/crawlee/memory_storage_client
tests/unit/_memory_storage_client Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -181,6 +181,7 @@ def create_dataset_from_directory(
181181 from crawlee .memory_storage_client ._dataset_client import DatasetClient
182182
183183 item_count = 0
184+ has_seen_metadata_file = False
184185 created_at = datetime .now (timezone .utc )
185186 accessed_at = datetime .now (timezone .utc )
186187 modified_at = datetime .now (timezone .utc )
@@ -189,6 +190,7 @@ def create_dataset_from_directory(
189190 metadata_filepath = os .path .join (storage_directory , METADATA_FILENAME )
190191
191192 if os .path .exists (metadata_filepath ):
193+ has_seen_metadata_file = True
192194 with open (metadata_filepath , encoding = 'utf-8' ) as f :
193195 json_content = json .load (f )
194196 resource_info = DatasetMetadata (** json_content )
@@ -202,7 +204,6 @@ def create_dataset_from_directory(
202204
203205 # Load dataset entries
204206 entries : dict [str , dict ] = {}
205- has_seen_metadata_file = False
206207
207208 for entry in os .scandir (storage_directory ):
208209 if entry .is_file ():
Original file line number Diff line number Diff line change @@ -138,3 +138,13 @@ async def test_iterate_items(dataset_client: DatasetClient) -> None:
138138 assert len (actual_items ) == item_count
139139 assert actual_items [0 ]['id' ] == 0
140140 assert actual_items [99 ]['id' ] == 99
141+
142+
143+ async def test_reuse_dataset (dataset_client : DatasetClient , memory_storage_client : MemoryStorageClient ) -> None :
144+ item_count = 10
145+ await dataset_client .push_items ([{'id' : i } for i in range (item_count )])
146+
147+ memory_storage_client .datasets_handled = [] # purge datasets loaded to test create_dataset_from_directory
148+ datasets_client = memory_storage_client .datasets ()
149+ dataset_info = await datasets_client .get_or_create (name = 'test' )
150+ assert dataset_info .item_count == item_count
You can’t perform that action at this time.
0 commit comments