Skip to content

Commit 8d27e39

Browse files
authored
fix: Remove redundant log, fix format (#408)
1 parent 4800cbd commit 8d27e39

File tree

2 files changed

+50
-2
lines changed

2 files changed

+50
-2
lines changed

src/crawlee/basic_crawler/basic_crawler.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ def sigint_handler() -> None:
399399
self._has_finished_before = True
400400

401401
final_statistics = self._statistics.calculate()
402-
self._logger.info(f'Final request statistics: {final_statistics.to_table()}')
402+
self._logger.info(f'Final request statistics:\n{final_statistics.to_table()}')
403403

404404
return final_statistics
405405

@@ -746,7 +746,6 @@ async def __is_finished_function(self) -> bool:
746746
f'All ongoing requests have now completed. Total requests processed: '
747747
f'{self._statistics.state.requests_finished}. The crawler will now shut down.'
748748
)
749-
self._logger.info(f'is_finished: {is_finished}')
750749
return True
751750

752751
return is_finished

tests/unit/basic_crawler/test_basic_crawler.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from crawlee.enqueue_strategy import EnqueueStrategy
2222
from crawlee.errors import SessionError, UserDefinedErrorHandlerError
2323
from crawlee.models import BaseRequestData, Request
24+
from crawlee.statistics.models import FinalStatistics
2425
from crawlee.storages import Dataset, KeyValueStore, RequestList, RequestQueue
2526
from crawlee.types import AddRequestsKwargs, BasicCrawlingContext, HttpHeaders
2627

@@ -638,3 +639,51 @@ async def handler(context: BasicCrawlingContext) -> None:
638639

639640
datasets_path = Path(configuration.storage_dir) / 'datasets' / 'default'
640641
assert not datasets_path.exists() or list(datasets_path.iterdir()) == []
642+
643+
644+
async def test_logs_final_statistics(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None:
645+
crawler = BasicCrawler(configure_logging=False)
646+
647+
@crawler.router.default_handler
648+
async def handler(context: BasicCrawlingContext) -> None:
649+
await context.push_data({'something': 'something'})
650+
651+
fake_statistics = FinalStatistics(
652+
requests_finished=4,
653+
requests_failed=33,
654+
retry_histogram=[1, 4, 8],
655+
request_avg_failed_duration=timedelta(seconds=99),
656+
request_avg_finished_duration=timedelta(milliseconds=483),
657+
requests_finished_per_minute=0.33,
658+
requests_failed_per_minute=0.1,
659+
request_total_duration=timedelta(minutes=12),
660+
requests_total=37,
661+
crawler_runtime=timedelta(minutes=5),
662+
)
663+
664+
monkeypatch.setattr(crawler._statistics, 'calculate', lambda: fake_statistics)
665+
666+
result = await crawler.run()
667+
assert result is fake_statistics
668+
669+
final_statistics = next(
670+
(record for record in caplog.records if record.msg.startswith('Final')),
671+
None,
672+
)
673+
674+
assert final_statistics is not None
675+
assert final_statistics.msg.splitlines() == [
676+
'Final request statistics:',
677+
'┌───────────────────────────────┬───────────┐',
678+
'│ requests_finished │ 4 │',
679+
'│ requests_failed │ 33 │',
680+
'│ retry_histogram │ [1, 4, 8] │',
681+
'│ request_avg_failed_duration │ 99.0 │',
682+
'│ request_avg_finished_duration │ 0.483 │',
683+
'│ requests_finished_per_minute │ 0.33 │',
684+
'│ requests_failed_per_minute │ 0.1 │',
685+
'│ request_total_duration │ 720.0 │',
686+
'│ requests_total │ 37 │',
687+
'│ crawler_runtime │ 300.0 │',
688+
'└───────────────────────────────┴───────────┘',
689+
]

0 commit comments

Comments
 (0)