Skip to content

Commit 7ea8fe6

Browse files
authored
fix: Correctly handle log level configuration (#508)
- The `CRAWLEE_VERBOSE` and `CRAWLEE_LOG_LEVEL` variables are now respected by crawlee - `CrawleeLogFormatter` doesn't alter the record being formatted anymore - SDK counterpart coming soon
1 parent 279e133 commit 7ea8fe6

File tree

3 files changed

+68
-27
lines changed

3 files changed

+68
-27
lines changed

src/crawlee/_log_config.py

Lines changed: 58 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,15 @@
22

33
import json
44
import logging
5+
import sys
56
import textwrap
6-
import traceback
7-
from typing import Any
7+
from typing import TYPE_CHECKING, Any
88

99
from colorama import Fore, Style, just_fix_windows_console
10+
from typing_extensions import assert_never
11+
12+
if TYPE_CHECKING:
13+
from crawlee.configuration import Configuration
1014

1115
just_fix_windows_console()
1216

@@ -31,6 +35,46 @@
3135
_LOG_MESSAGE_INDENT = ' ' * 6
3236

3337

38+
def get_configured_log_level(configuration: Configuration) -> int:
39+
verbose_logging_requested = 'verbose_log' in configuration.model_fields_set and configuration.verbose_log
40+
41+
if 'log_level' in configuration.model_fields_set:
42+
if configuration.log_level == 'DEBUG':
43+
return logging.DEBUG
44+
if configuration.log_level == 'INFO':
45+
return logging.INFO
46+
if configuration.log_level == 'WARNING':
47+
return logging.WARNING
48+
if configuration.log_level == 'ERROR':
49+
return logging.ERROR
50+
if configuration.log_level == 'CRITICAL':
51+
return logging.CRITICAL
52+
53+
assert_never(configuration.log_level)
54+
55+
if sys.flags.dev_mode or verbose_logging_requested:
56+
return logging.DEBUG
57+
58+
return logging.INFO
59+
60+
61+
def configure_logger(
62+
logger: logging.Logger,
63+
configuration: Configuration,
64+
*,
65+
remove_old_handlers: bool = False,
66+
) -> None:
67+
handler = logging.StreamHandler()
68+
handler.setFormatter(CrawleeLogFormatter())
69+
70+
if remove_old_handlers:
71+
for old_handler in logger.handlers[:]:
72+
logger.removeHandler(old_handler)
73+
74+
logger.addHandler(handler)
75+
logger.setLevel(get_configured_log_level(configuration))
76+
77+
3478
class CrawleeLogFormatter(logging.Formatter):
3579
"""Log formatter that prints out the log message nicely formatted, with colored level and stringified extra fields.
3680
@@ -87,15 +131,6 @@ def format(self, record: logging.LogRecord) -> str:
87131
level_short_alias = _LOG_LEVEL_SHORT_ALIAS.get(record.levelno, record.levelname)
88132
level_string = f'{level_color_code}{level_short_alias}{Style.RESET_ALL} '
89133

90-
# Format the exception, if there is some
91-
# Basically just print the traceback and indent it a bit
92-
exception_string = ''
93-
if record.exc_info:
94-
exc_info = record.exc_info
95-
record.exc_info = None
96-
exception_string = ''.join(traceback.format_exception(*exc_info)).rstrip()
97-
exception_string = '\n' + textwrap.indent(exception_string, _LOG_MESSAGE_INDENT)
98-
99134
# Format the extra log record fields, if there were some
100135
# Just stringify them to JSON and color them gray
101136
extra_string = ''
@@ -105,8 +140,19 @@ def format(self, record: logging.LogRecord) -> str:
105140
f' {Fore.LIGHTBLACK_EX}({json.dumps(extra, ensure_ascii=False, default=str)}){Style.RESET_ALL}'
106141
)
107142

143+
# Call the parent method so that it populates missing fields in the record
144+
super().format(record)
145+
108146
# Format the actual log message
109-
log_string = super().format(record)
147+
log_string = self.formatMessage(record)
148+
149+
# Format the exception, if there is some
150+
# Basically just print the traceback and indent it a bit
151+
exception_string = ''
152+
if record.exc_text:
153+
exception_string = '\n' + textwrap.indent(record.exc_text.rstrip(), _LOG_MESSAGE_INDENT)
154+
else:
155+
exception_string = ''
110156

111157
if self.include_logger_name:
112158
# Include logger name at the beginning of the log line

src/crawlee/basic_crawler/_basic_crawler.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
from crawlee._autoscaling import AutoscaledPool
2323
from crawlee._autoscaling.snapshotter import Snapshotter
2424
from crawlee._autoscaling.system_status import SystemStatus
25-
from crawlee._log_config import CrawleeLogFormatter
25+
from crawlee._log_config import configure_logger, get_configured_log_level
2626
from crawlee._request import BaseRequestData, Request, RequestState
2727
from crawlee._types import BasicCrawlingContext, HttpHeaders, RequestHandlerRunResult, SendRequestFunction
2828
from crawlee._utils.byte_size import ByteSize
@@ -203,20 +203,14 @@ def __init__(
203203
self._retry_on_blocked = retry_on_blocked
204204

205205
if configure_logging:
206-
handler = logging.StreamHandler()
207-
handler.setFormatter(CrawleeLogFormatter())
208-
209206
root_logger = logging.getLogger()
210-
211-
for old_handler in root_logger.handlers[:]:
212-
root_logger.removeHandler(old_handler)
213-
214-
root_logger.addHandler(handler)
215-
root_logger.setLevel(logging.INFO if not sys.flags.dev_mode else logging.DEBUG)
207+
configure_logger(root_logger, self._configuration, remove_old_handlers=True)
216208

217209
# Silence HTTPX logger
218210
httpx_logger = logging.getLogger('httpx')
219-
httpx_logger.setLevel(logging.WARNING if not sys.flags.dev_mode else logging.INFO)
211+
httpx_logger.setLevel(
212+
logging.DEBUG if get_configured_log_level(self._configuration) <= logging.DEBUG else logging.WARNING
213+
)
220214

221215
if not _logger:
222216
_logger = logging.getLogger(__name__)

src/crawlee/configuration.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
from __future__ import annotations
44

55
from datetime import timedelta
6-
from typing import Annotated
6+
from typing import Annotated, Literal
77

8-
from pydantic import AliasChoices, Field
8+
from pydantic import AliasChoices, BeforeValidator, Field
99
from pydantic_settings import BaseSettings, SettingsConfigDict
1010
from typing_extensions import Self
1111

@@ -51,14 +51,15 @@ class Configuration(BaseSettings):
5151
] = False
5252

5353
log_level: Annotated[
54-
int,
54+
Literal['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
5555
Field(
5656
validation_alias=AliasChoices(
5757
'apify_log_level',
5858
'crawlee_log_level',
5959
)
6060
),
61-
] = 4 # INFO
61+
BeforeValidator(lambda value: str(value).upper()),
62+
] = 'INFO'
6263

6364
default_dataset_id: Annotated[
6465
str,

0 commit comments

Comments
 (0)