1414from pathlib import Path
1515from typing import TYPE_CHECKING , Any , Callable , Generic , Union , cast
1616from urllib .parse import ParseResult , urlparse
17+ from weakref import WeakKeyDictionary
1718
1819from tldextract import TLDExtract
1920from typing_extensions import NotRequired , TypedDict , TypeVar , Unpack , assert_never
@@ -290,6 +291,9 @@ def __init__(
290291 self ._failed_request_handler : FailedRequestHandler [TCrawlingContext | BasicCrawlingContext ] | None = None
291292 self ._abort_on_error = abort_on_error
292293
294+ # Context of each request with matching result.
295+ self ._context_result_map = WeakKeyDictionary [BasicCrawlingContext , RequestHandlerRunResult ]()
296+
293297 # Context pipeline
294298 self ._context_pipeline = (_context_pipeline or ContextPipeline ()).compose (self ._check_url_after_redirects )
295299
@@ -908,9 +912,9 @@ async def send_request(
908912
909913 return send_request
910914
911- async def _commit_request_handler_result (
912- self , context : BasicCrawlingContext , result : RequestHandlerRunResult
913- ) -> None :
915+ async def _commit_request_handler_result (self , context : BasicCrawlingContext ) -> None :
916+ result = self . _context_result_map [ context ]
917+
914918 request_manager = await self .get_request_manager ()
915919 origin = context .request .loaded_url or context .request .url
916920
@@ -1018,19 +1022,20 @@ async def __run_task_function(self) -> None:
10181022
10191023 session = await self ._get_session ()
10201024 proxy_info = await self ._get_proxy_info (request , session )
1021- empty_result = RequestHandlerRunResult (key_value_store_getter = self .get_key_value_store )
1025+ result = RequestHandlerRunResult (key_value_store_getter = self .get_key_value_store )
10221026
10231027 context = BasicCrawlingContext (
10241028 request = request ,
10251029 session = session ,
10261030 proxy_info = proxy_info ,
10271031 send_request = self ._prepare_send_request_function (session , proxy_info ),
1028- add_requests = empty_result .add_requests ,
1029- push_data = empty_result .push_data ,
1030- get_key_value_store = empty_result .get_key_value_store ,
1032+ add_requests = result .add_requests ,
1033+ push_data = result .push_data ,
1034+ get_key_value_store = result .get_key_value_store ,
10311035 use_state = self ._use_state ,
10321036 log = self ._logger ,
10331037 )
1038+ self ._context_result_map [context ] = result
10341039
10351040 statistics_id = request .id or request .unique_key
10361041 self ._statistics .record_request_processing_start (statistics_id )
@@ -1039,12 +1044,11 @@ async def __run_task_function(self) -> None:
10391044 request .state = RequestState .REQUEST_HANDLER
10401045
10411046 try :
1042- result = await self ._run_request_handler (context = context , result = empty_result )
1047+ await self ._run_request_handler (context = context )
10431048 except asyncio .TimeoutError as e :
10441049 raise RequestHandlerError (e , context ) from e
10451050
1046- await self ._commit_request_handler_result (context , result )
1047-
1051+ await self ._commit_request_handler_result (context )
10481052 await wait_for (
10491053 lambda : request_manager .mark_request_as_handled (context .request ),
10501054 timeout = self ._internal_timeout ,
@@ -1132,17 +1136,14 @@ async def __run_task_function(self) -> None:
11321136 )
11331137 raise
11341138
1135- async def _run_request_handler (
1136- self , context : BasicCrawlingContext , result : RequestHandlerRunResult
1137- ) -> RequestHandlerRunResult :
1139+ async def _run_request_handler (self , context : BasicCrawlingContext ) -> None :
11381140 await wait_for (
11391141 lambda : self ._context_pipeline (context , self .router ),
11401142 timeout = self ._request_handler_timeout ,
11411143 timeout_message = 'Request handler timed out after '
11421144 f'{ self ._request_handler_timeout .total_seconds ()} seconds' ,
11431145 logger = self ._logger ,
11441146 )
1145- return result
11461147
11471148 def _is_session_blocked_status_code (self , session : Session | None , status_code : int ) -> bool :
11481149 """Check if the HTTP status code indicates that the session was blocked by the target website.
0 commit comments