Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/crawlee/http_clients/_httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

if TYPE_CHECKING:
from collections.abc import Iterable
from ssl import SSLContext

from crawlee._types import HttpMethod, HttpPayload
from crawlee.base_storage_client._models import Request
Expand Down Expand Up @@ -101,6 +102,7 @@ def __init__(
ignore_http_error_status_codes: Iterable[int] = (),
http1: bool = True,
http2: bool = True,
verify: str | bool | SSLContext = True,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that this is the name of the attribute in httpx, but maybe we could rename this to something like verify_certificates or verify_ssl for better clarity?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These names are indeed clearer.

But as a weak argument. The requests API, has become such a standard for HTTP clients in Python that any Python developer, will expect exactly verify.

header_generator: HeaderGenerator | None = _DEFAULT_HEADER_GENERATOR,
**async_client_kwargs: Any,
) -> None:
Expand All @@ -112,6 +114,7 @@ def __init__(
ignore_http_error_status_codes: HTTP status codes to ignore as errors.
http1: Whether to enable HTTP/1.1 support.
http2: Whether to enable HTTP/2 support.
verify: SSL certificates used to verify the identity of requested hosts.
header_generator: Header generator instance to use for generating common headers.
async_client_kwargs: Additional keyword arguments for `httpx.AsyncClient`.
"""
Expand All @@ -122,6 +125,7 @@ def __init__(
)
self._http1 = http1
self._http2 = http2
self._verify = verify
self._async_client_kwargs = async_client_kwargs
self._header_generator = header_generator

Expand Down Expand Up @@ -219,13 +223,12 @@ def _get_client(self, proxy_url: str | None) -> httpx.AsyncClient:
# Prepare a default kwargs for the new client.
kwargs: dict[str, Any] = {
'transport': _HttpxTransport(
proxy=proxy_url,
http1=self._http1,
http2=self._http2,
proxy=proxy_url, http1=self._http1, http2=self._http2, verify=self._verify
),
'proxy': proxy_url,
'http1': self._http1,
'http2': self._http2,
'verify': self._verify,
}

# Update the default kwargs with any additional user-provided kwargs.
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/basic_crawler/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import asyncio
import json
import logging
import os
from collections import Counter
from dataclasses import dataclass
from datetime import timedelta
import os
from pathlib import Path
from typing import TYPE_CHECKING, Any
from unittest.mock import AsyncMock, Mock
Expand Down