Skip to content

Commit 9e7ebfa

Browse files
authored
Write docs for async methods explicitly, get rid of _make_async_docs decorator (apify#100)
Since the docs for async methods which were generated with the `_make_async_docs` decorator were not showing in the hints in VSCode, and were not processed in `pydoc-markdown`, it's better that we write the docstrings for async methods explicitly. So that we don't have to copy-paste all the docstrings manually, I wrote a script, which uses the [Red Baron](https://github.com/PyCQA/redbaron) package, which synchronizes the docstrings from the sync methods to their analogous async methods, making some adjustments to make the docstrings work for the async methods. I also wrote another script, which runs in CI, and just checks that the docstrings are synchronized, failing the check if they're not. This was way easier than I was worried about, we should have done this right from the start.
1 parent ee72c07 commit 9e7ebfa

38 files changed

Lines changed: 1366 additions & 194 deletions

.flake8

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ ignore =
2020
D409
2121
D413
2222
U101
23-
ignore-decorators = _make_async_docs
2423
per-file-ignores =
2524
docs/*: D
2625
scripts/*: D

.github/workflows/check_docs.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,8 @@ jobs:
1919
- name: Install dependencies
2020
run: make install-dev
2121

22+
- name: Check async docstrings
23+
run: make check-async-docstrings
24+
2225
- name: Check docs building
2326
run: make check-docs

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ repos:
77
language: system
88
pass_filenames: false
99

10+
- id: check-docs
11+
name: "Check whether async docstrings are aligned with sync ones"
12+
entry: "make check-async-docstrings"
13+
language: system
14+
pass_filenames: false
15+
1016
- id: type-check
1117
name: "Type-check codebase"
1218
entry: "make type-check"

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ test:
1818
type-check:
1919
python3 -m mypy
2020

21-
check-code: lint type-check test
21+
check-code: lint check-async-docstrings type-check test
2222

2323
format:
2424
python3 -m isort src tests
@@ -30,5 +30,11 @@ docs:
3030
check-docs:
3131
./docs/res/check.sh
3232

33+
check-async-docstrings:
34+
python3 scripts/check_async_docstrings.py
35+
36+
fix-async-docstrings:
37+
python3 scripts/fix_async_docstrings.py
38+
3339
check-changelog-entry:
3440
python3 scripts/check_version_in_changelog.py

scripts/check_async_docstrings.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import re
2+
import sys
3+
from pathlib import Path
4+
5+
from redbaron import RedBaron # type: ignore
6+
from utils import sync_to_async_docstring
7+
8+
found_issues = False
9+
10+
clients_path = Path(__file__).parent.resolve() / '../src/apify_client'
11+
for client_source_path in clients_path.glob('**/*.py'):
12+
with open(client_source_path, 'r') as source_file:
13+
red = RedBaron(source_code=source_file.read())
14+
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
15+
if not async_class:
16+
continue
17+
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
18+
for async_method in async_class.find_all('DefNode'):
19+
sync_method = sync_class.find('DefNode', name=async_method.name)
20+
if isinstance(sync_method.value[0].value, str):
21+
sync_docstring = sync_method.value[0].value
22+
async_docstring = async_method.value[0].value
23+
expected_docstring = sync_to_async_docstring(sync_docstring)
24+
25+
if not isinstance(async_docstring, str):
26+
print(f'Missing docstring for "{async_class.name}.{async_method.name}"!')
27+
found_issues = True
28+
continue
29+
if expected_docstring != async_docstring:
30+
print(f'Docstring for "{async_class.name}.{async_method.name}" is out of sync with "{sync_class.name}.{sync_method.name}"!')
31+
found_issues = True
32+
33+
if found_issues:
34+
print()
35+
print('Issues with async docstrings found. Please fix them manually or by running `make fix-async-docstrings`.')
36+
sys.exit(1)
37+
else:
38+
print('Success: async method docstrings are in sync with sync method docstrings.')

scripts/fix_async_docstrings.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import re
2+
from pathlib import Path
3+
4+
from redbaron import RedBaron # type: ignore
5+
from utils import sync_to_async_docstring
6+
7+
clients_path = Path(__file__).parent.resolve() / '../src/apify_client'
8+
for client_source_path in clients_path.glob('**/*.py'):
9+
with open(client_source_path, 'r+') as source_file:
10+
red = RedBaron(source_code=source_file.read())
11+
async_class = red.find('ClassNode', name=re.compile('.*ClientAsync$'))
12+
if not async_class:
13+
continue
14+
sync_class = red.find('ClassNode', name=async_class.name.replace('ClientAsync', 'Client'))
15+
for async_method in async_class.find_all('DefNode'):
16+
sync_method = sync_class.find('DefNode', name=async_method.name)
17+
if isinstance(sync_method.value[0].value, str):
18+
sync_docstring = sync_method.value[0].value
19+
async_docstring = async_method.value[0].value
20+
correct_async_docstring = sync_to_async_docstring(sync_docstring)
21+
if async_docstring == correct_async_docstring:
22+
continue
23+
24+
# work around a bug in Red Baron, which indents docstrings too much when you insert them, so we have to un-indent it one level first
25+
correct_async_docstring = re.sub('^ ', '', correct_async_docstring, flags=re.M)
26+
27+
if not isinstance(async_docstring, str):
28+
print(f'Fixing missing docstring for "{async_class.name}.{async_method.name}"...')
29+
async_method.value.insert(0, correct_async_docstring)
30+
else:
31+
async_method.value[0] = correct_async_docstring
32+
33+
updated_source_code = red.dumps()
34+
35+
# work around a bug in Red Baron, which adds indents to docstrings when you insert them (including empty lines),
36+
# so we have to remove the extra whitespace
37+
updated_source_code = re.sub('^ $', '', updated_source_code, flags=re.M)
38+
39+
# work around a bug in Red Baron, which indents `except` and `finally` statements wrong
40+
# so we have to add some extra whitespace
41+
updated_source_code = re.sub('^except', ' except', updated_source_code, flags=re.M)
42+
updated_source_code = re.sub('^ except', ' except', updated_source_code, flags=re.M)
43+
updated_source_code = re.sub('^finally', ' finally', updated_source_code, flags=re.M)
44+
updated_source_code = re.sub('^ finally', ' finally', updated_source_code, flags=re.M)
45+
46+
# work around a bug in Red Baron, which sometimes adds an extra new line to the end of a file
47+
updated_source_code = updated_source_code.rstrip() + '\n'
48+
49+
source_file.seek(0)
50+
source_file.write(updated_source_code)
51+
source_file.truncate()

scripts/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pathlib
2+
import re
23

34
PACKAGE_NAME = 'apify_client'
45
REPO_ROOT = pathlib.Path(__file__).parent.resolve() / '..'
@@ -36,3 +37,13 @@ def set_current_package_version(version: str) -> None:
3637
version_file.seek(0)
3738
version_file.write('\n'.join(updated_version_file_lines))
3839
version_file.truncate()
40+
41+
42+
# Generate convert a docstring from a sync resource client method
43+
# into a doctring for its async resource client analogue
44+
def sync_to_async_docstring(docstring: str) -> str:
45+
substitutions = [(r'Client', r'ClientAsync')]
46+
res = docstring
47+
for (pattern, replacement) in substitutions:
48+
res = re.sub(pattern, replacement, res, flags=re.M)
49+
return res

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
'pre-commit ~= 2.20.0',
7171
'pytest ~= 7.2.0',
7272
'pytest-asyncio ~= 0.20.3',
73+
'redbaron ~= 0.9.2',
7374
'sphinx ~= 5.3.0',
7475
'sphinx-autodoc-typehints ~= 1.19.5',
7576
'sphinx-markdown-builder == 0.5.4', # pinned to 0.5.4, because 0.5.5 has a formatting bug

src/apify_client/_utils.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -222,23 +222,6 @@ def _maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any:
222222
return maybe_enum_member
223223

224224

225-
BoundFunc = TypeVar('BoundFunc', bound=Callable[..., Any])
226-
227-
228-
def _make_async_docs(*, src: Callable) -> Callable[[BoundFunc], BoundFunc]:
229-
"""Copy docstring from another method, adjusting it to work in an async scenario."""
230-
substitutions = [(r'Client', r'ClientAsync')]
231-
232-
def decorator(dest: BoundFunc) -> BoundFunc:
233-
if not dest.__doc__ and src.__doc__:
234-
dest.__doc__ = src.__doc__
235-
for (pattern, replacement) in substitutions:
236-
dest.__doc__ = re.sub(pattern, replacement, dest.__doc__, flags=re.M)
237-
return dest
238-
239-
return decorator
240-
241-
242225
class ListPage(Generic[T]):
243226
"""A single page of items returned from a list() method."""
244227

src/apify_client/client.py

Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from typing import Dict, Optional, Union
22

33
from ._http_client import _HTTPClient, _HTTPClientAsync
4-
from ._utils import _make_async_docs
54
from .clients import (
65
ActorClient,
76
ActorClientAsync,
@@ -272,7 +271,6 @@ class ApifyClientAsync(_BaseApifyClient):
272271

273272
http_client: _HTTPClientAsync
274273

275-
@_make_async_docs(src=ApifyClient.__init__)
276274
def __init__(
277275
self,
278276
token: Optional[str] = None,
@@ -282,6 +280,16 @@ def __init__(
282280
min_delay_between_retries_millis: Optional[int] = 500,
283281
timeout_secs: Optional[int] = 360,
284282
):
283+
"""Initialize the ApifyClientAsync.
284+
285+
Args:
286+
token (str, optional): The Apify API token
287+
api_url (str, optional): The URL of the Apify API server to which to connect to. Defaults to https://api.apify.com
288+
max_retries (int, optional): How many times to retry a failed request at most
289+
min_delay_between_retries_millis (int, optional): How long will the client wait between retrying requests
290+
(increases exponentially from this value)
291+
timeout_secs (int, optional): The socket timeout of the HTTP requests sent to the Apify API
292+
"""
285293
super().__init__(
286294
token,
287295
api_url=api_url,
@@ -297,90 +305,139 @@ def __init__(
297305
timeout_secs=self.timeout_secs,
298306
)
299307

300-
@_make_async_docs(src=ApifyClient.actor)
301308
def actor(self, actor_id: str) -> ActorClientAsync:
309+
"""Retrieve the sub-client for manipulating a single actor.
310+
311+
Args:
312+
actor_id (str): ID of the actor to be manipulated
313+
"""
302314
return ActorClientAsync(resource_id=actor_id, **self._options())
303315

304-
@_make_async_docs(src=ApifyClient.actors)
305316
def actors(self) -> ActorCollectionClientAsync:
317+
"""Retrieve the sub-client for manipulating actors."""
306318
return ActorCollectionClientAsync(**self._options())
307319

308-
@_make_async_docs(src=ApifyClient.build)
309320
def build(self, build_id: str) -> BuildClientAsync:
321+
"""Retrieve the sub-client for manipulating a single actor build.
322+
323+
Args:
324+
build_id (str): ID of the actor build to be manipulated
325+
"""
310326
return BuildClientAsync(resource_id=build_id, **self._options())
311327

312-
@_make_async_docs(src=ApifyClient.builds)
313328
def builds(self) -> BuildCollectionClientAsync:
329+
"""Retrieve the sub-client for querying multiple builds of a user."""
314330
return BuildCollectionClientAsync(**self._options())
315331

316-
@_make_async_docs(src=ApifyClient.run)
317332
def run(self, run_id: str) -> RunClientAsync:
333+
"""Retrieve the sub-client for manipulating a single actor run.
334+
335+
Args:
336+
run_id (str): ID of the actor run to be manipulated
337+
"""
318338
return RunClientAsync(resource_id=run_id, **self._options())
319339

320-
@_make_async_docs(src=ApifyClient.runs)
321340
def runs(self) -> RunCollectionClientAsync:
341+
"""Retrieve the sub-client for querying multiple actor runs of a user."""
322342
return RunCollectionClientAsync(**self._options())
323343

324-
@_make_async_docs(src=ApifyClient.dataset)
325344
def dataset(self, dataset_id: str) -> DatasetClientAsync:
345+
"""Retrieve the sub-client for manipulating a single dataset.
346+
347+
Args:
348+
dataset_id (str): ID of the dataset to be manipulated
349+
"""
326350
return DatasetClientAsync(resource_id=dataset_id, **self._options())
327351

328-
@_make_async_docs(src=ApifyClient.datasets)
329352
def datasets(self) -> DatasetCollectionClientAsync:
353+
"""Retrieve the sub-client for manipulating datasets."""
330354
return DatasetCollectionClientAsync(**self._options())
331355

332-
@_make_async_docs(src=ApifyClient.key_value_store)
333356
def key_value_store(self, key_value_store_id: str) -> KeyValueStoreClientAsync:
357+
"""Retrieve the sub-client for manipulating a single key-value store.
358+
359+
Args:
360+
key_value_store_id (str): ID of the key-value store to be manipulated
361+
"""
334362
return KeyValueStoreClientAsync(resource_id=key_value_store_id, **self._options())
335363

336-
@_make_async_docs(src=ApifyClient.key_value_stores)
337364
def key_value_stores(self) -> KeyValueStoreCollectionClientAsync:
365+
"""Retrieve the sub-client for manipulating key-value stores."""
338366
return KeyValueStoreCollectionClientAsync(**self._options())
339367

340-
@_make_async_docs(src=ApifyClient.request_queue)
341368
def request_queue(self, request_queue_id: str, *, client_key: Optional[str] = None) -> RequestQueueClientAsync:
369+
"""Retrieve the sub-client for manipulating a single request queue.
370+
371+
Args:
372+
request_queue_id (str): ID of the request queue to be manipulated
373+
client_key (str): A unique identifier of the client accessing the request queue
374+
"""
342375
return RequestQueueClientAsync(resource_id=request_queue_id, client_key=client_key, **self._options())
343376

344-
@_make_async_docs(src=ApifyClient.request_queues)
345377
def request_queues(self) -> RequestQueueCollectionClientAsync:
378+
"""Retrieve the sub-client for manipulating request queues."""
346379
return RequestQueueCollectionClientAsync(**self._options())
347380

348-
@_make_async_docs(src=ApifyClient.webhook)
349381
def webhook(self, webhook_id: str) -> WebhookClientAsync:
382+
"""Retrieve the sub-client for manipulating a single webhook.
383+
384+
Args:
385+
webhook_id (str): ID of the webhook to be manipulated
386+
"""
350387
return WebhookClientAsync(resource_id=webhook_id, **self._options())
351388

352-
@_make_async_docs(src=ApifyClient.webhooks)
353389
def webhooks(self) -> WebhookCollectionClientAsync:
390+
"""Retrieve the sub-client for querying multiple webhooks of a user."""
354391
return WebhookCollectionClientAsync(**self._options())
355392

356-
@_make_async_docs(src=ApifyClient.webhook_dispatch)
357393
def webhook_dispatch(self, webhook_dispatch_id: str) -> WebhookDispatchClientAsync:
394+
"""Retrieve the sub-client for accessing a single webhook dispatch.
395+
396+
Args:
397+
webhook_dispatch_id (str): ID of the webhook dispatch to access
398+
"""
358399
return WebhookDispatchClientAsync(resource_id=webhook_dispatch_id, **self._options())
359400

360-
@_make_async_docs(src=ApifyClient.webhook_dispatches)
361401
def webhook_dispatches(self) -> WebhookDispatchCollectionClientAsync:
402+
"""Retrieve the sub-client for querying multiple webhook dispatches of a user."""
362403
return WebhookDispatchCollectionClientAsync(**self._options())
363404

364-
@_make_async_docs(src=ApifyClient.schedule)
365405
def schedule(self, schedule_id: str) -> ScheduleClientAsync:
406+
"""Retrieve the sub-client for manipulating a single schedule.
407+
408+
Args:
409+
schedule_id (str): ID of the schedule to be manipulated
410+
"""
366411
return ScheduleClientAsync(resource_id=schedule_id, **self._options())
367412

368-
@_make_async_docs(src=ApifyClient.schedules)
369413
def schedules(self) -> ScheduleCollectionClientAsync:
414+
"""Retrieve the sub-client for manipulating schedules."""
370415
return ScheduleCollectionClientAsync(**self._options())
371416

372-
@_make_async_docs(src=ApifyClient.log)
373417
def log(self, build_or_run_id: str) -> LogClientAsync:
418+
"""Retrieve the sub-client for retrieving logs.
419+
420+
Args:
421+
build_or_run_id (str): ID of the actor build or run for which to access the log
422+
"""
374423
return LogClientAsync(resource_id=build_or_run_id, **self._options())
375424

376-
@_make_async_docs(src=ApifyClient.task)
377425
def task(self, task_id: str) -> TaskClientAsync:
426+
"""Retrieve the sub-client for manipulating a single task.
427+
428+
Args:
429+
task_id (str): ID of the task to be manipulated
430+
"""
378431
return TaskClientAsync(resource_id=task_id, **self._options())
379432

380-
@_make_async_docs(src=ApifyClient.tasks)
381433
def tasks(self) -> TaskCollectionClientAsync:
434+
"""Retrieve the sub-client for manipulating tasks."""
382435
return TaskCollectionClientAsync(**self._options())
383436

384-
@_make_async_docs(src=ApifyClient.user)
385437
def user(self, user_id: Optional[str] = None) -> UserClientAsync:
438+
"""Retrieve the sub-client for querying users.
439+
440+
Args:
441+
user_id (str, optional): ID of user to be queried. If None, queries the user belonging to the token supplied to the client
442+
"""
386443
return UserClientAsync(resource_id=user_id, **self._options())

0 commit comments

Comments
 (0)