Skip to content

Commit 964a54e

Browse files
committed
Only 2 level of inheritance
1 parent 985a021 commit 964a54e

19 files changed

Lines changed: 332 additions & 165 deletions

src/crawlee/base_storage_client/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,4 @@
44
from .base_key_value_store_collection_client import BaseKeyValueStoreCollectionClient
55
from .base_request_queue_client import BaseRequestQueueClient
66
from .base_request_queue_collection_client import BaseRequestQueueCollectionClient
7-
from .base_resource_client import BaseResourceClient
8-
from .base_resource_collection_client import BaseResourceCollectionClient
97
from .base_storage_client import BaseStorageClient

src/crawlee/base_storage_client/base_dataset_client.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,50 @@
11
from __future__ import annotations
22

3-
from abc import abstractmethod
3+
from abc import ABC, abstractmethod
44
from typing import TYPE_CHECKING, AsyncIterator
55

6-
from .base_resource_client import BaseResourceClient
7-
86
if TYPE_CHECKING:
9-
from crawlee.storages.models import DatasetItemsListPage
7+
from crawlee.storages.models import DatasetItemsListPage, DatasetMetadata
108
from crawlee.types import JSONSerializable
119

1210

13-
class BaseDatasetClient(BaseResourceClient):
14-
"""Base class for dataset clients."""
11+
class BaseDatasetClient(ABC):
12+
"""Abstract base class for dataset resource clients.
13+
14+
These clients are specific to the type of resource they manage and operate under a designated storage
15+
client, like a memory storage client.
16+
"""
1517

1618
_LIST_ITEMS_LIMIT = 999_999_999_999
1719
"""This is what API returns in the x-apify-pagination-limit header when no limit query parameter is used."""
1820

21+
@abstractmethod
22+
async def get(self) -> DatasetMetadata | None:
23+
"""Get metadata about the dataset being managed by this client.
24+
25+
Returns:
26+
An object containing the dataset's details, or None if the dataset does not exist.
27+
"""
28+
29+
@abstractmethod
30+
async def update(
31+
self,
32+
*,
33+
name: str | None = None,
34+
) -> DatasetMetadata:
35+
"""Update the dataset metadata.
36+
37+
Args:
38+
name: New new name for the dataset.
39+
40+
Returns:
41+
An object reflecting the updated dataset metadata.
42+
"""
43+
44+
@abstractmethod
45+
async def delete(self) -> None:
46+
"""Permanently delete the dataset managed by this client."""
47+
1948
@abstractmethod
2049
async def list_items(
2150
self,
@@ -156,7 +185,7 @@ async def stream_items(
156185
skip_hidden: bool | None = None,
157186
xml_root: str | None = None,
158187
xml_row: str | None = None,
159-
) -> AsyncIterator:
188+
) -> AsyncIterator[dict]:
160189
"""Retrieves dataset items as a streaming response.
161190
162191
Args:
@@ -176,7 +205,7 @@ async def stream_items(
176205
xml_root: Custom root element name for XML output; default is 'items'.
177206
xml_row: Custom element name for each item in XML; default is 'item'.
178207
179-
Returns:
208+
Yields:
180209
The dataset items in a streaming response.
181210
"""
182211

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,58 @@
11
from __future__ import annotations
22

3-
from .base_resource_collection_client import BaseResourceCollectionClient
3+
from abc import ABC, abstractmethod
4+
from typing import TYPE_CHECKING
45

6+
if TYPE_CHECKING:
7+
from crawlee.storages.models import DatasetListPage, DatasetMetadata
58

6-
class BaseDatasetCollectionClient(BaseResourceCollectionClient):
7-
"""Base class for dataset collection clients."""
9+
10+
class BaseDatasetCollectionClient(ABC):
11+
"""Abstract base class for dataset collection clients.
12+
13+
This collection client handles operations that involve multiple instances of a given resource type.
14+
"""
15+
16+
@abstractmethod
17+
async def get_or_create(
18+
self,
19+
*,
20+
id: str | None = None,
21+
name: str | None = None,
22+
schema: dict | None = None,
23+
) -> DatasetMetadata:
24+
"""Retrieve an existing dataset by its name or ID, or create a new one if it does not exist.
25+
26+
Args:
27+
id: Optional ID of the dataset to retrieve or create. If provided, the method will attempt
28+
to find a dataset with the ID.
29+
30+
name: Optional name of the dataset resource to retrieve or create. If provided, the method will
31+
attempt to find a dataset with this name.
32+
33+
schema: Optional schema for the dataset resource to be created.
34+
35+
Returns:
36+
Metadata object containing the information of the retrieved or created dataset.
37+
"""
38+
39+
@abstractmethod
40+
async def list(
41+
self,
42+
*,
43+
unnamed: bool | None = None,
44+
limit: int | None = None,
45+
offset: int | None = None,
46+
desc: bool | None = None,
47+
) -> DatasetListPage:
48+
"""List the available datasets.
49+
50+
Args:
51+
unnamed: Whether to list only the unnamed datasets.
52+
limit: Maximum number of datasets to return.
53+
offset: Number of datasets to skip from the beginning of the list.
54+
desc: Whether to sort the datasets in descending order.
55+
56+
Returns:
57+
The list of available datasets matching the specified filters.
58+
"""

src/crawlee/base_storage_client/base_key_value_store_client.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,45 @@
11
from __future__ import annotations
22

3-
from abc import abstractmethod
3+
from abc import ABC, abstractmethod
44
from typing import TYPE_CHECKING, Any, AsyncIterator
55

6-
from .base_resource_client import BaseResourceClient
7-
86
if TYPE_CHECKING:
9-
from crawlee.storages.models import KeyValueStoreListKeysPage, KeyValueStoreRecord
7+
from crawlee.storages.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord
8+
9+
10+
class BaseKeyValueStoreClient(ABC):
11+
"""Abstract base class for key-value store resource clients.
12+
13+
These clients are specific to the type of resource they manage and operate under a designated storage
14+
client, like a memory storage client.
15+
"""
16+
17+
@abstractmethod
18+
async def get(self) -> KeyValueStoreMetadata | None:
19+
"""Get metadata about the key-value store being managed by this client.
20+
21+
Returns:
22+
An object containing the key-value store's details, or None if the key-value store does not exist.
23+
"""
1024

25+
@abstractmethod
26+
async def update(
27+
self,
28+
*,
29+
name: str | None = None,
30+
) -> KeyValueStoreMetadata:
31+
"""Update the key-value store metadata.
32+
33+
Args:
34+
name: New new name for the key-value store.
1135
12-
class BaseKeyValueStoreClient(BaseResourceClient):
13-
"""Base class for key-value store clients."""
36+
Returns:
37+
An object reflecting the updated key-value store metadata.
38+
"""
39+
40+
@abstractmethod
41+
async def delete(self) -> None:
42+
"""Permanently delete the key-value store managed by this client."""
1443

1544
@abstractmethod
1645
async def list_keys(
Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,58 @@
11
from __future__ import annotations
22

3-
from .base_resource_collection_client import BaseResourceCollectionClient
3+
from abc import ABC, abstractmethod
4+
from typing import TYPE_CHECKING
45

6+
if TYPE_CHECKING:
7+
from crawlee.storages.models import KeyValueStoreListPage, KeyValueStoreMetadata
58

6-
class BaseKeyValueStoreCollectionClient(BaseResourceCollectionClient):
7-
"""Base class for key-value store collection clients."""
9+
10+
class BaseKeyValueStoreCollectionClient(ABC):
11+
"""Abstract base class for key-value store collection clients.
12+
13+
This collection client handles operations that involve multiple instances of a given resource type.
14+
"""
15+
16+
@abstractmethod
17+
async def get_or_create(
18+
self,
19+
*,
20+
id: str | None = None,
21+
name: str | None = None,
22+
schema: dict | None = None,
23+
) -> KeyValueStoreMetadata:
24+
"""Retrieve an existing key-value store by its name or ID, or create a new one if it does not exist.
25+
26+
Args:
27+
id: Optional ID of the key-value store to retrieve or create. If provided, the method will attempt
28+
to find a key-value store with the ID.
29+
30+
name: Optional name of the key-value store resource to retrieve or create. If provided, the method will
31+
attempt to find a key-value store with this name.
32+
33+
schema: Optional schema for the key-value store resource to be created.
34+
35+
Returns:
36+
Metadata object containing the information of the retrieved or created key-value store.
37+
"""
38+
39+
@abstractmethod
40+
async def list(
41+
self,
42+
*,
43+
unnamed: bool | None = None,
44+
limit: int | None = None,
45+
offset: int | None = None,
46+
desc: bool | None = None,
47+
) -> KeyValueStoreListPage:
48+
"""List the available key-value stores.
49+
50+
Args:
51+
unnamed: Whether to list only the unnamed key-value stores.
52+
limit: Maximum number of key-value stores to return.
53+
offset: Number of key-value stores to skip from the beginning of the list.
54+
desc: Whether to sort the key-value stores in descending order.
55+
56+
Returns:
57+
The list of available key-value stores matching the specified filters.
58+
"""

src/crawlee/base_storage_client/base_request_queue_client.py

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,46 @@
11
from __future__ import annotations
22

3-
from abc import abstractmethod
3+
from abc import ABC, abstractmethod
44
from typing import TYPE_CHECKING
55

6-
from .base_resource_client import BaseResourceClient
7-
86
if TYPE_CHECKING:
97
from crawlee.request import Request
10-
from crawlee.storages.models import RequestQueueHead, RequestQueueOperationInfo
8+
from crawlee.storages.models import RequestQueueHead, RequestQueueMetadata, RequestQueueOperationInfo
9+
10+
11+
class BaseRequestQueueClient(ABC):
12+
"""Abstract base class for request queue resource clients.
13+
14+
These clients are specific to the type of resource they manage and operate under a designated storage
15+
client, like a memory storage client.
16+
"""
17+
18+
@abstractmethod
19+
async def get(self) -> RequestQueueMetadata | None:
20+
"""Get metadata about the request queue being managed by this client.
21+
22+
Returns:
23+
An object containing the request queue's details, or None if the request queue does not exist.
24+
"""
1125

26+
@abstractmethod
27+
async def update(
28+
self,
29+
*,
30+
name: str | None = None,
31+
) -> RequestQueueMetadata:
32+
"""Update the request queue metadata.
33+
34+
Args:
35+
name: New new name for the request queue.
1236
13-
class BaseRequestQueueClient(BaseResourceClient):
14-
"""Base class for request queue clients."""
37+
Returns:
38+
An object reflecting the updated request queue metadata.
39+
"""
40+
41+
@abstractmethod
42+
async def delete(self) -> None:
43+
"""Permanently delete the request queue managed by this client."""
1544

1645
@abstractmethod
1746
async def list_head(self, *, limit: int | None = None) -> RequestQueueHead:
Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,58 @@
11
from __future__ import annotations
22

3-
from .base_resource_collection_client import BaseResourceCollectionClient
3+
from abc import ABC, abstractmethod
4+
from typing import TYPE_CHECKING
45

6+
if TYPE_CHECKING:
7+
from crawlee.storages.models import RequestQueueListPage, RequestQueueMetadata
58

6-
class BaseRequestQueueCollectionClient(BaseResourceCollectionClient):
7-
"""Base class for request queue collection clients."""
9+
10+
class BaseRequestQueueCollectionClient(ABC):
11+
"""Abstract base class for request queue collection clients.
12+
13+
This collection client handles operations that involve multiple instances of a given resource type.
14+
"""
15+
16+
@abstractmethod
17+
async def get_or_create(
18+
self,
19+
*,
20+
id: str | None = None,
21+
name: str | None = None,
22+
schema: dict | None = None,
23+
) -> RequestQueueMetadata:
24+
"""Retrieve an existing request queue by its name or ID, or create a new one if it does not exist.
25+
26+
Args:
27+
id: Optional ID of the request queue to retrieve or create. If provided, the method will attempt
28+
to find a request queue with the ID.
29+
30+
name: Optional name of the request queue resource to retrieve or create. If provided, the method will
31+
attempt to find a request queue with this name.
32+
33+
schema: Optional schema for the request queue resource to be created.
34+
35+
Returns:
36+
Metadata object containing the information of the retrieved or created request queue.
37+
"""
38+
39+
@abstractmethod
40+
async def list(
41+
self,
42+
*,
43+
unnamed: bool | None = None,
44+
limit: int | None = None,
45+
offset: int | None = None,
46+
desc: bool | None = None,
47+
) -> RequestQueueListPage:
48+
"""List the available request queues.
49+
50+
Args:
51+
unnamed: Whether to list only the unnamed request queues.
52+
limit: Maximum number of request queues to return.
53+
offset: Number of request queues to skip from the beginning of the list.
54+
desc: Whether to sort the request queues in descending order.
55+
56+
Returns:
57+
The list of available request queues matching the specified filters.
58+
"""

0 commit comments

Comments
 (0)