Skip to content

Commit 5593637

Browse files
committed
caller responsible for handling the exception
1 parent eafc68a commit 5593637

1 file changed

Lines changed: 32 additions & 34 deletions

File tree

src/apify/_utils.py

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -445,43 +445,36 @@ def normalize_url(url: str, *, keep_url_fragment: bool = False) -> str:
445445
keep_url_fragment: Flag to determine whether the fragment part of the URL should be retained.
446446
447447
Returns:
448-
A string containing the normalized URL. If normalization fails, the original URL is returned. It logs
449-
a warning message in this case.
448+
A string containing the normalized URL.
450449
"""
451-
try:
452-
# Parse the URL
453-
parsed_url = urlparse(url.strip())
454-
search_params = dict(parse_qsl(parsed_url.query)) # Convert query to a dict
455-
456-
# Remove any 'utm_' parameters
457-
search_params = {k: v for k, v in search_params.items() if not k.startswith('utm_')}
458-
459-
# Construct the new query string
460-
sorted_keys = sorted(search_params.keys())
461-
sorted_query = urlencode([(k, search_params[k]) for k in sorted_keys])
462-
463-
# Construct the final URL
464-
new_url = (
465-
parsed_url._replace(
466-
query=sorted_query,
467-
scheme=parsed_url.scheme,
468-
netloc=parsed_url.netloc,
469-
path=parsed_url.path.rstrip('/'),
470-
)
471-
.geturl()
472-
.lower()
450+
# Parse the URL
451+
parsed_url = urlparse(url.strip())
452+
search_params = dict(parse_qsl(parsed_url.query)) # Convert query to a dict
453+
454+
# Remove any 'utm_' parameters
455+
search_params = {k: v for k, v in search_params.items() if not k.startswith('utm_')}
456+
457+
# Construct the new query string
458+
sorted_keys = sorted(search_params.keys())
459+
sorted_query = urlencode([(k, search_params[k]) for k in sorted_keys])
460+
461+
# Construct the final URL
462+
new_url = (
463+
parsed_url._replace(
464+
query=sorted_query,
465+
scheme=parsed_url.scheme,
466+
netloc=parsed_url.netloc,
467+
path=parsed_url.path.rstrip('/'),
473468
)
469+
.geturl()
470+
.lower()
471+
)
474472

475-
# Retain the URL fragment if required
476-
if not keep_url_fragment:
477-
new_url = new_url.split('#')[0]
473+
# Retain the URL fragment if required
474+
if not keep_url_fragment:
475+
new_url = new_url.split('#')[0]
478476

479-
except Exception as exc:
480-
logger.warning(f'Failed to normalize URL: {exc}')
481-
return url
482-
483-
else:
484-
return new_url
477+
return new_url
485478

486479

487480
def compute_unique_key(
@@ -509,7 +502,12 @@ def compute_unique_key(
509502
A string representing the unique key for the request.
510503
"""
511504
# Normalize the URL and method.
512-
normalized_url = normalize_url(url, keep_url_fragment=keep_url_fragment)
505+
try:
506+
normalized_url = normalize_url(url, keep_url_fragment=keep_url_fragment)
507+
except Exception as exc:
508+
logger.warning(f'Failed to normalize URL: {exc}')
509+
normalized_url = url
510+
513511
normalized_method = method.upper()
514512

515513
# Compute and return the extended unique key if required.

0 commit comments

Comments
 (0)