File tree Expand file tree Collapse file tree 2 files changed +24
-6
lines changed
src/crawlee/crawlers/_adaptive_playwright
tests/unit/crawlers/_adaptive_playwright Expand file tree Collapse file tree 2 files changed +24
-6
lines changed Original file line number Diff line number Diff line change @@ -149,10 +149,6 @@ def __init__(
149149 non-default configuration.
150150 kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
151151 """
152- # Some sub crawler kwargs are internally modified. Prepare copies.
153- basic_crawler_kwargs_for_static_crawler = deepcopy (kwargs )
154- basic_crawler_kwargs_for_pw_crawler = deepcopy (kwargs )
155-
156152 # Adaptive crawling related.
157153 self .rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor ()
158154 self .result_checker = result_checker or (lambda _ : True )
@@ -170,11 +166,11 @@ def __init__(
170166 # Each sub crawler will use custom logger .
171167 static_logger = getLogger ('Subcrawler_static' )
172168 static_logger .setLevel (logging .ERROR )
173- basic_crawler_kwargs_for_static_crawler [ '_logger' ] = static_logger
169+ basic_crawler_kwargs_for_static_crawler : _BasicCrawlerOptions = { '_logger' : static_logger , ** kwargs }
174170
175171 pw_logger = getLogger ('Subcrawler_playwright' )
176172 pw_logger .setLevel (logging .ERROR )
177- basic_crawler_kwargs_for_pw_crawler [ '_logger' ] = pw_logger
173+ basic_crawler_kwargs_for_pw_crawler : _BasicCrawlerOptions = { '_logger' : pw_logger , ** kwargs }
178174
179175 # Initialize sub crawlers to create their pipelines.
180176 static_crawler_class = AbstractHttpCrawler .create_parsed_http_crawler_class (static_parser = static_parser )
Original file line number Diff line number Diff line change 3030 AdaptiveContextError ,
3131)
3232from crawlee .statistics import Statistics
33+ from crawlee .storage_clients import SqlStorageClient
3334from crawlee .storages import KeyValueStore
3435
3536if TYPE_CHECKING :
3637 from collections .abc import AsyncGenerator , Iterator
38+ from pathlib import Path
3739
3840 from yarl import URL
3941
@@ -726,3 +728,23 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
726728 await crawler .run (test_urls [:1 ])
727729
728730 mocked_h3_handler .assert_called_once_with (None )
731+
732+
733+ async def test_adaptive_playwright_crawler_with_sql_storage (test_urls : list [str ], tmp_path : Path ) -> None :
734+ """Tests that AdaptivePlaywrightCrawler can be initialized with SqlStorageClient."""
735+ storage_dir = tmp_path / 'test_table.db'
736+
737+ async with SqlStorageClient (connection_string = f'sqlite+aiosqlite:///{ storage_dir } ' ) as storage_client :
738+ crawler = AdaptivePlaywrightCrawler .with_beautifulsoup_static_parser (
739+ storage_client = storage_client ,
740+ )
741+
742+ mocked_handler = Mock ()
743+
744+ @crawler .router .default_handler
745+ async def request_handler (_context : AdaptivePlaywrightCrawlingContext ) -> None :
746+ mocked_handler ()
747+
748+ await crawler .run (test_urls [:1 ])
749+
750+ mocked_handler .assert_called ()
You can’t perform that action at this time.
0 commit comments