Skip to content

Commit 59566e5

Browse files
committed
Add Snapshotter
1 parent 12e1b4b commit 59566e5

17 files changed

Lines changed: 1136 additions & 4 deletions

pyproject.toml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,14 @@ requires-python = ">=3.8"
2525
# We use inclusive ordered comparison clause for non-Apify packages intentionally in order to enhance the Apify SDK's
2626
# compatibility with a wide range of external packages. This decision was discussed in detail in the following PR:
2727
# https://github.com/apify/apify-sdk-python/pull/154
28-
dependencies = ["httpx >= 0.24.1", "typing-extensions >= 4.1.0"]
28+
dependencies = [
29+
"apify >= 1.5.0",
30+
"apify-shared >= 1.1.0",
31+
"colorama >= 0.4.6",
32+
"httpx >= 0.25.0",
33+
"pyee >= 11.1.0",
34+
"typing-extensions >= 4.1.0",
35+
]
2936

3037
[project.optional-dependencies]
3138
dev = [
@@ -43,6 +50,7 @@ dev = [
4350
"respx ~= 0.20.1",
4451
"ruff ~= 0.1.13",
4552
"twine ~= 4.0.2",
53+
"types-colorama ~= 0.4.15.20240106",
4654
]
4755

4856
[project.urls]
@@ -93,6 +101,9 @@ ignore = [
93101
"S311", # Standard pseudo-random generators are not suitable for cryptographic purposes
94102
"TD002", # Missing author in TODO; try: `# TODO(<author_name>): ...` or `# TODO @<author_name>: ...
95103
"TRY003", # Avoid specifying long messages outside the exception class
104+
"D", # TODO: temporarily ignore everything from the pydocstyle
105+
"T", # TODO: temporarily ignore occurence of print statements
106+
"ERA", # TODO: tmp
96107
]
97108

98109
[tool.ruff.format]
@@ -125,6 +136,9 @@ inline-quotes = "single"
125136
[tool.ruff.lint.pydocstyle]
126137
convention = "google"
127138

139+
[tool.ruff.lint.isort]
140+
known-first-party = ["crawlee"]
141+
128142
[tool.pytest]
129143
asyncio_mode = "auto"
130144
timeout = 1200

src/crawlee/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .main import BasicCrawler
1+
from .configuration import Configuration
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .snapshotter import Snapshotter
2+
from .system_status import SystemStatus
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
"""
2+
Inspired by: https://github.com/apify/crawlee/blob/master/packages/utils/src/internals/memory-info.ts
3+
"""
4+
5+
from dataclasses import dataclass
6+
7+
8+
@dataclass
9+
class MemoryInfo:
10+
"""Describes memory usage of the process."""
11+
12+
total_bytes: int # Total memory available in the system or container
13+
free_bytes: int # Amount of free memory in the system or container
14+
used_bytes: int # Amount of memory used (= totalBytes - freeBytes)
15+
main_process_bytes: int # Amount of memory used by the current Python process
16+
child_processes_bytes: int # Amount of memory used by child processes of the current Python process
17+
18+
19+
async def get_memory_info() -> MemoryInfo:
20+
# TODO
21+
...
22+
return MemoryInfo(1, 2, 3, 4, 5)

src/crawlee/autoscaling/py.typed

Whitespace-only changes.

0 commit comments

Comments
 (0)