Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/crawlee/browsers/_browser_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,22 @@ def with_default_plugin(
*,
headless: bool | None = None,
browser_type: BrowserType | None = None,
browser_options: Mapping[str, Any] | None = None,
page_options: Mapping[str, Any] | None = None,
**kwargs: Any,
) -> BrowserPool:
"""Create a new instance with a single `BaseBrowserPlugin` configured with the provided options.
"""Create a new instance with a single `PlaywrightBrowserPlugin` configured with the provided options.

Args:
headless: Whether to run the browser in headless mode.
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
browser_options: Keyword arguments to pass to the browser launch method.
page_options: Keyword arguments to pass to the new page method.
kwargs: Additional arguments for default constructor.
"""
plugin_options: dict = defaultdict(dict)
plugin_options['browser_options'] = browser_options or {}
plugin_options['page_options'] = page_options or {}

if headless is not None:
plugin_options['browser_options']['headless'] = headless
Expand Down
4 changes: 2 additions & 2 deletions src/crawlee/browsers/_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def __init__(

Args:
browser_type: The type of the browser to launch.
browser_options: Options to configure the browser instance.
page_options: Options to configure a new page instance.
browser_options: Keyword arguments to pass to the browser launch method.
page_options: Keyword arguments to pass to the new page method.
max_open_pages_per_browser: The maximum number of pages that can be opened in a single browser instance.
Once reached, a new browser instance will be launched to handle the excess.
"""
Expand Down
25 changes: 21 additions & 4 deletions src/crawlee/playwright_crawler/_playwright_crawler.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Awaitable, Callable
from typing import TYPE_CHECKING, Any, Awaitable, Callable, Mapping

from pydantic import ValidationError

Expand Down Expand Up @@ -71,6 +71,8 @@ def __init__(
self,
browser_pool: BrowserPool | None = None,
browser_type: BrowserType | None = None,
browser_options: Mapping[str, Any] | None = None,
page_options: Mapping[str, Any] | None = None,
headless: bool | None = None,
**kwargs: Unpack[BasicCrawlerOptions[PlaywrightCrawlingContext]],
) -> None:
Expand All @@ -80,20 +82,35 @@ def __init__(
browser_pool: A `BrowserPool` instance to be used for launching the browsers and getting pages.
browser_type: The type of browser to launch ('chromium', 'firefox', or 'webkit').
This option should not be used if `browser_pool` is provided.
browser_options: Keyword arguments to pass to the browser launch method.
This option should not be used if `browser_pool` is provided.
page_options: Keyword arguments to pass to the new page method.
This option should not be used if `browser_pool` is provided.
headless: Whether to run the browser in headless mode.
This option should not be used if `browser_pool` is provided.
kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
"""
if browser_pool:
# Raise an exception if browser_pool is provided together with headless or browser_type arguments.
if headless is not None or browser_type is not None:
if (
headless is not None
or browser_type is not None
or browser_options is not None
or page_options is not None
):
raise ValueError(
'You cannot provide `headless` or `browser_type` arguments when `browser_pool` is provided.'
'You cannot provide `headless`, `browser_type`, `browser_options` or `page_options` '
'arguments when `browser_pool` is provided.'
)

# If browser_pool is not provided, create a new instance of BrowserPool with specified arguments.
else:
browser_pool = BrowserPool.with_default_plugin(headless=headless, browser_type=browser_type)
browser_pool = BrowserPool.with_default_plugin(
headless=headless,
browser_type=browser_type,
browser_options=browser_options,
page_options=page_options,
)

self._browser_pool = browser_pool

Expand Down
Loading