Skip to content

Commit a84125f

Browse files
Mantisusvdusek
andauthored
fix: respect custom HTTP headers in PlaywrightCrawler (#685)
### Description add `Request.headers` support for `PlaywrightCrawler`. ### Testing The test uses `'https://httpbin.org/get' to ensure that the headers have been installed ### Checklist - [x] CI passed --------- Co-authored-by: Vlada Dusek <[email protected]>
1 parent 6463098 commit a84125f

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

src/crawlee/playwright_crawler/_playwright_crawler.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ async def _navigate(
148148
infinite_scroll).
149149
"""
150150
async with context.page:
151+
if context.request.headers:
152+
await context.page.set_extra_http_headers(context.request.headers.model_dump())
151153
# Navigate to the URL and get response.
152154
response = await context.page.goto(context.request.url)
153155

tests/unit/playwright_crawler/test_playwright_crawler.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from typing import TYPE_CHECKING
99
from unittest import mock
1010

11-
from crawlee import Glob
11+
from crawlee import Glob, Request
1212
from crawlee.fingerprint_suite._consts import (
1313
PW_CHROMIUM_HEADLESS_DEFAULT_SEC_CH_UA,
1414
PW_CHROMIUM_HEADLESS_DEFAULT_SEC_CH_UA_MOBILE,
@@ -133,6 +133,26 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
133133
assert headers['User-Agent'] == PW_FIREFOX_HEADLESS_DEFAULT_USER_AGENT
134134

135135

136+
async def test_custom_headers() -> None:
137+
crawler = PlaywrightCrawler()
138+
response_headers = dict[str, str]()
139+
request_headers = {'Power-Header': 'ring', 'Library': 'storm', 'My-Test-Header': 'fuzz'}
140+
141+
@crawler.router.default_handler
142+
async def request_handler(context: PlaywrightCrawlingContext) -> None:
143+
response = await context.response.text()
144+
context_response_headers = dict(json.loads(response)).get('headers', {})
145+
146+
for key, val in context_response_headers.items():
147+
response_headers[key] = val
148+
149+
await crawler.run([Request.from_url('https://httpbin.org/get', headers=request_headers)])
150+
151+
assert response_headers.get('Power-Header') == request_headers['Power-Header']
152+
assert response_headers.get('Library') == request_headers['Library']
153+
assert response_headers.get('My-Test-Header') == request_headers['My-Test-Header']
154+
155+
136156
async def test_pre_navigation_hook() -> None:
137157
crawler = PlaywrightCrawler()
138158
mock_hook = mock.AsyncMock(return_value=None)

0 commit comments

Comments
 (0)