22import re
33import traceback
44
5+ import crawlee .errors
6+
57
68def _get_only_innermost_exception (error : BaseException ) -> BaseException :
7- """Get innermost exception by following __cause__ and __context__ attributes of exception."""
9+ """Get innermost exception by following __cause__ and __context__ attributes of exception.
10+
11+ If the innermost exception is UserHandlerTimeoutError, return whatever caused that if possible.
12+ """
13+ if type (error ) is crawlee .errors .UserHandlerTimeoutError :
14+ if error .__cause__ :
15+ return error .__cause__
16+ if error .__context__ :
17+ return error .__context__
18+ return error
19+
820 if error .__cause__ :
921 return _get_only_innermost_exception (error .__cause__ )
1022 if error .__context__ :
@@ -34,7 +46,7 @@ def _strip_pep657_highlighting(traceback_part: str) -> str:
3446
3547
3648def reduce_asyncio_timeout_error_to_relevant_traceback_parts (
37- timeout_error : asyncio .exceptions .TimeoutError ,
49+ timeout_error : asyncio .exceptions .TimeoutError | crawlee . errors . UserHandlerTimeoutError ,
3850) -> list [str ]:
3951 innermost_error_traceback_parts = _get_traceback_parts_for_innermost_exception (timeout_error )
4052 return _get_filtered_traceback_parts_for_asyncio_timeout_error (innermost_error_traceback_parts )
@@ -43,13 +55,20 @@ def reduce_asyncio_timeout_error_to_relevant_traceback_parts(
4355def _get_traceback_parts_for_innermost_exception (error : Exception ) -> list [str ]:
4456 innermost_error = _get_only_innermost_exception (error )
4557 return traceback .format_exception (
46- type (innermost_error ), value = innermost_error , tb = innermost_error .__traceback__ , chain = True
58+ type (innermost_error ), value = innermost_error , tb = innermost_error .__traceback__ , chain = False
4759 )
4860
4961
5062def get_one_line_error_summary_if_possible (error : Exception ) -> str :
5163 if isinstance (error , asyncio .exceptions .TimeoutError ):
52- most_relevant_part = ',' + reduce_asyncio_timeout_error_to_relevant_traceback_parts (error )[- 1 ]
64+ relevant_part = reduce_asyncio_timeout_error_to_relevant_traceback_parts (error )
65+ most_relevant_part = (',' + relevant_part [- 1 ]) if len (relevant_part ) else ''
66+ elif isinstance (error , crawlee .errors .UserHandlerTimeoutError ):
67+ # Error is user defined handler. First two lines should be location of the `UserHandlerTimeoutError` in crawlee
68+ # code and third line the topmost user error
69+ traceback_parts = _get_traceback_parts_for_innermost_exception (error )
70+ relevant_index_from_start = 3
71+ most_relevant_part = traceback_parts [2 ] if len (traceback_parts ) >= relevant_index_from_start else ''
5372 elif 'playwright._impl._errors.Error' in str (error .__class__ ):
5473 # Playwright autogenerated errors are often very long, so we do not try to summarize them at all as they anyway
5574 # point to deep internals.
0 commit comments