Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/crawlee/beautifulsoup_crawler/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
try:
from ._beautifulsoup_crawler import BeautifulSoupCrawler
from ._beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupParser
from ._beautifulsoup_crawling_context import BeautifulSoupCrawlingContext
except ImportError as exc:
raise ImportError(
Expand Down
4 changes: 3 additions & 1 deletion src/crawlee/beautifulsoup_crawler/_beautifulsoup_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
if TYPE_CHECKING:
from crawlee._types import BasicCrawlingContext, EnqueueLinksKwargs

BeautifulSoupParser = Literal['html.parser', 'lxml', 'xml', 'html5lib']


class BeautifulSoupCrawler(BasicCrawler[BeautifulSoupCrawlingContext]):
"""A web crawler for performing HTTP requests and parsing HTML/XML content.
Expand Down Expand Up @@ -61,7 +63,7 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
def __init__(
self,
*,
parser: Literal['html.parser', 'lxml', 'xml', 'html5lib'] = 'lxml',
parser: BeautifulSoupParser = 'lxml',
additional_http_error_status_codes: Iterable[int] = (),
ignore_http_error_status_codes: Iterable[int] = (),
**kwargs: Unpack[BasicCrawlerOptions[BeautifulSoupCrawlingContext]],
Expand Down
Loading