-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathjson_builder.py
More file actions
317 lines (254 loc) · 8.98 KB
/
json_builder.py
File metadata and controls
317 lines (254 loc) · 8.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# (C) 2023 GoodData Corporation
import importlib
import inspect
import json
import os
import pkgutil
import re
import sys
from types import FunctionType, ModuleType
from typing import Any
import cattrs
import docstring_parser
from attr import define
from docstring_parser import parse
from docstring_parser.common import Docstring, DocstringStyle
# Object definitions
@define
class ParamsData:
arg_name: str
default: str | None
is_optional: bool | None
type_name: str | None
description: str | None
@classmethod
def from_docstr_parameter(cls, docstr_parameter):
return cls(
arg_name=docstr_parameter.arg_name,
default=docstr_parameter.default,
is_optional=docstr_parameter.is_optional,
type_name=docstr_parameter.type_name,
description=docstr_parameter.description,
)
@define
class ReturnData:
type_name: str | None
description: str | None
return_name: str | None
@classmethod
def from_parsed_docstr(cls, parsed_docstr: Docstring):
return cls(
type_name=parsed_docstr.returns.type_name,
description=parsed_docstr.returns.description,
return_name=parsed_docstr.returns.return_name,
)
@define
class DocstringData:
params: list[ParamsData]
long_description: str | None
short_description: str | None
examples: str
returns: ReturnData | None = None
@classmethod
def from_parsed_docstr(cls, parsed_docstr: Docstring):
docstr_data = cls(
params=[ParamsData.from_docstr_parameter(param) for param in parsed_docstr.params],
long_description=parsed_docstr.long_description,
short_description=parsed_docstr.short_description,
examples=str(parsed_docstr.examples),
)
if parsed_docstr.returns:
docstr_data.returns = ReturnData.from_parsed_docstr(parsed_docstr)
return docstr_data
@define
class SignatureData:
params: list[tuple[str, str]]
return_annotation: str
@define
class FunctionData:
docstring: str | None
signature: SignatureData
is_property: bool = False
docstring_parsed: DocstringData | None = None
kind: str = "function"
@define
class ClassData:
docstring: str | None
functions: dict[str, FunctionData]
docstring_parsed: DocstringData | None = None
kind: str = "class"
# regex patterns for `docstring_fixes` function
docstr_fix_none_pattern = re.compile(r"Args:[\n ]*None")
def docstring_fixes(docstr: str) -> str:
"""
Sometimes GD docstrings use invalid formatting, which the parser is unable to parse
This function fixes those issues
Args:
docstr: docstring to fix
Returns:
str: fixed docstring
"""
# Fix for Args: None in docstrings, which is not valid
docstr = docstr_fix_none_pattern.sub("", docstr)
return docstr
def docstring_data(docstr: str | None) -> DocstringData | None:
"""
Parse the docstring and return the parser data in a dict
Args:
docstr (str | None): docstring to parse
Returns:
DocstringData: parsed docstring data
Raises:
ValueError: if the docstring is invalid (= not Google style)
"""
if docstr is None:
return None
docstr = docstring_fixes(docstr)
try:
parsed_docstr = parse(docstr, style=DocstringStyle.GOOGLE)
except docstring_parser.common.ParseError:
raise ValueError(f"Invalid docstring: {docstr}")
if ":param" in docstr:
# Some numpy style docstrings are parsed without throwing an error
# but the parsed data is invalid, this is a quick way to detect those
raise ValueError(f"Invalid docstring (numpy): {docstr}")
return DocstringData.from_parsed_docstr(parsed_docstr)
def signature_data(sig: inspect.Signature) -> SignatureData:
"""
Parse the signature object and return the contained data in a formatted dict
Args:
sig: Signature object to be analysed
Returns:
SignatureData: parsed signature data
"""
sig_params_data = []
for param in sig.parameters.values():
# Skip self parameter in methods
if param.name == "self":
continue
annotation = param.annotation
if annotation == inspect.Parameter.empty:
annotation = None
sig_params_data.append((str(param), str(annotation)))
return_annotation = sig.return_annotation
if return_annotation == inspect.Parameter.empty:
return_annotation = None
return SignatureData(params=sig_params_data, return_annotation=str(return_annotation))
def function_data(func: FunctionType, is_property: bool = False) -> FunctionData:
"""
Parse the function object and return information about the function in a formatted dict
Args:
func: Function object to be analysed
is_property: Whether the function is a property
Returns:
FunctionData: parsed function data
"""
try:
docstr_data = docstring_data(inspect.getdoc(func))
except ValueError:
print(f"WARN: Invalid docstring in func {inspect.getmodule(func)}: {str(func)}")
docstr_data = None
return FunctionData(
docstring=inspect.getdoc(func),
docstring_parsed=docstr_data,
signature=signature_data(inspect.signature(func)),
is_property=is_property,
)
def class_data(obj: type) -> ClassData:
"""
Parse the class object and return information about the class in a formatted dict
Args:
obj(type): class object to be analysed
Returns:
ClassData: parsed class data
"""
data = {key: value for key, value in inspect.getmembers(obj)}
ret = ClassData(
docstring=inspect.getdoc(obj),
docstring_parsed=docstring_data(inspect.getdoc(obj)),
functions={},
)
for key, value in data.items():
if isinstance(value, FunctionType):
ret.functions[key] = function_data(value)
elif isinstance(value, property):
for attr in ("fget", "fset", "fdel"):
if hasattr(value, attr) and getattr(value, attr) is not None:
ret.functions[key] = function_data(getattr(value, attr), is_property=True)
return ret
def module_data(module: ModuleType, module_name: str) -> dict:
"""
Parse a module object and return formatted docstring data about its contents
Args:
module (ModuleType): module object to be analysed
Returns:
dict: parsed module data
"""
data: dict[str, Any] = {"kind": "module"}
objects = vars(module) if hasattr(module, "__dict__") else {}
for name, obj in objects.items():
obj_module = inspect.getmodule(obj)
if obj_module is None:
continue
if isinstance(obj, type):
# Filter out non-gooddata libraries
if module_name in obj_module.__name__:
data[name] = class_data(obj)
elif isinstance(obj, ModuleType) and module_name in obj_module.__name__:
data[name] = module_data(obj)
return data
def parse_package(obj: ModuleType, module_name: str = None) -> dict:
"""
Parse the package and its submodules into a dict object, that
can be converted into a json
Args:
obj (ModuleType): package object
module_name: name of the module
Returns:
dict: data of package
Example:
{
"submodule1": {
"file1": {
"class_name" : class_data (see `object_data`)
}
}
}
"""
data = {"kind": "module"}
if not isinstance(obj, ModuleType):
return data
iterator = pkgutil.iter_modules(obj.__path__)
for item in iterator:
if item.name not in data:
data[item.name] = {}
if item.ispkg and item.name in vars(obj):
data[item.name].update(parse_package(vars(obj)[item.name], module_name))
else:
if item.name in vars(obj):
module = vars(obj)[item.name]
data[item.name].update(module_data(module, module_name))
return data
def import_submodules(pkg_name: str) -> dict[str, ModuleType]:
"""
Import all submodules of a package, enabling their parsing
Args:
pkg_name (str): package name
"""
package = sys.modules[pkg_name]
dictionary = {}
for loader, name, is_pkg in pkgutil.walk_packages(package.__path__):
if importlib.util.find_spec(f"{pkg_name}.{name}") is not None:
dictionary[name] = importlib.import_module(f"{pkg_name}.{name}")
if __name__ == "__main__":
import gooddata_pandas
import gooddata_sdk
import_submodules("gooddata_sdk")
import_submodules("gooddata_pandas")
output_json: dict = {
**cattrs.unstructure(parse_package(gooddata_pandas, "gooddata_pandas")),
**cattrs.unstructure(parse_package(gooddata_sdk, "gooddata_sdk")),
}
with open("data.json", "w") as f:
f.write(json.dumps(output_json))
print(f"Saved the .json file: `data.json` to {os.getcwd()}")