1818from tldextract import TLDExtract
1919from typing_extensions import NotRequired , TypedDict , TypeVar , Unpack , assert_never
2020
21- from crawlee import Glob
21+ from crawlee import Glob , service_container
2222from crawlee ._utils .urls import convert_to_absolute_url , is_url_absolute
2323from crawlee ._utils .wait import wait_for
2424from crawlee .autoscaling import AutoscaledPool , ConcurrencySettings
2525from crawlee .autoscaling .snapshotter import Snapshotter
2626from crawlee .autoscaling .system_status import SystemStatus
2727from crawlee .basic_crawler .context_pipeline import ContextPipeline
2828from crawlee .basic_crawler .router import Router
29- from crawlee .configuration import Configuration
3029from crawlee .enqueue_strategy import EnqueueStrategy
3130from crawlee .errors import (
3231 ContextPipelineInitializationError ,
3534 SessionError ,
3635 UserDefinedErrorHandlerError ,
3736)
38- from crawlee .events import LocalEventManager
3937from crawlee .http_clients import HttpxHttpClient
4038from crawlee .log_config import CrawleeLogFormatter
4139from crawlee .models import BaseRequestData , DatasetItemsListPage , Request , RequestState
4745if TYPE_CHECKING :
4846 import re
4947
48+ from crawlee .configuration import Configuration
49+ from crawlee .events .event_manager import EventManager
5050 from crawlee .http_clients import BaseHttpClient , HttpResponse
5151 from crawlee .proxy_configuration import ProxyConfiguration , ProxyInfo
5252 from crawlee .sessions import Session
@@ -77,6 +77,7 @@ class BasicCrawlerOptions(TypedDict, Generic[TCrawlingContext]):
7777 retry_on_blocked : NotRequired [bool ]
7878 proxy_configuration : NotRequired [ProxyConfiguration ]
7979 statistics : NotRequired [Statistics [StatisticsState ]]
80+ event_manager : NotRequired [EventManager ]
8081 configure_logging : NotRequired [bool ]
8182 _context_pipeline : NotRequired [ContextPipeline [TCrawlingContext ]]
8283 _additional_context_managers : NotRequired [Sequence [AsyncContextManager ]]
@@ -111,6 +112,7 @@ def __init__(
111112 retry_on_blocked : bool = True ,
112113 proxy_configuration : ProxyConfiguration | None = None ,
113114 statistics : Statistics | None = None ,
115+ event_manager : EventManager | None = None ,
114116 configure_logging : bool = True ,
115117 _context_pipeline : ContextPipeline [TCrawlingContext ] | None = None ,
116118 _additional_context_managers : Sequence [AsyncContextManager ] | None = None ,
@@ -138,6 +140,7 @@ def __init__(
138140 retry_on_blocked: If set to True, the crawler will try to automatically bypass any detected bot protection
139141 proxy_configuration: A HTTP proxy configuration to be used for making requests
140142 statistics: A preconfigured `Statistics` instance if you wish to use non-default configuration
143+ event_manager: A custom `EventManager` instance if you wish to use a non-default one
141144 configure_logging: If set to True, the crawler will configure the logging infrastructure
142145 _context_pipeline: Allows extending the request lifecycle and modifying the crawling context.
143146 This parameter is meant to be used by child classes, not when BasicCrawler is instantiated directly.
@@ -164,7 +167,7 @@ def __init__(
164167 self ._max_session_rotations = max_session_rotations
165168
166169 self ._request_provider = request_provider
167- self ._configuration = configuration or Configuration . get_global_configuration ()
170+ self ._configuration = configuration or service_container . get_configuration ()
168171
169172 self ._request_handler_timeout = request_handler_timeout
170173 self ._internal_timeout = (
@@ -175,8 +178,7 @@ def __init__(
175178
176179 self ._tld_extractor = TLDExtract (cache_dir = tempfile .TemporaryDirectory ().name )
177180
178- self ._event_manager = LocalEventManager () # TODO: switch based on configuration
179- # https://github.com/apify/crawlee-py/issues/83
181+ self ._event_manager = event_manager or service_container .get_event_manager ()
180182 self ._snapshotter = Snapshotter (self ._event_manager )
181183 self ._pool = AutoscaledPool (
182184 system_status = SystemStatus (self ._snapshotter ),
0 commit comments