-
-
Notifications
You must be signed in to change notification settings - Fork 163
Expand file tree
/
Copy pathgen_python.py
More file actions
executable file
·254 lines (211 loc) · 8.37 KB
/
gen_python.py
File metadata and controls
executable file
·254 lines (211 loc) · 8.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import codecs, os, re, subprocess, sys, yaml
# We don't read the index
IGNORED_FILES = "index.md"
# The class for all reql objects
query = 'rethinkdb.ast.RqlQuery.'
# The python class associated with each type
parents = {
None: '',
'r': 'rethinkdb.',
'sequence': query,
'query': query,
'stream': query,
'grouped_stream': query,
'singleSelection': query,
'array': query,
'number': query,
'bool': query,
'value': query,
'string': query,
'time': query,
'any': query,
'geometry': query,
'point': query,
'line': query,
'polygon': query,
'connection': 'rethinkdb.net.Connection.',
'cursor': 'rethinkdb.net.Cursor.',
'db': 'rethinkdb.ast.DB.',
'table': 'rethinkdb.ast.Table.',
'set_loop_type': 'rethinkdb.'
}
# The real python names for names used in the docs
tags = {
'[] (bracket)': [(query, '__getitem__')],
'nth, []': [(query, 'nth')],
'slice, []': [(query, 'slice')],
'+': [(query, '__add__'), ('rethinkdb.', 'add')],
'-': [(query, '__sub__'), ('rethinkdb.', 'sub')],
'*': [(query, '__mul__'), ('rethinkdb.', 'mul')],
'/': [(query, '__div__'), ('rethinkdb.', 'div')],
'%': [(query, '__mod__'), ('rethinkdb.', 'mod')],
'&, and_': [(query, '__and__'), ('rethinkdb.', 'and_')],
'|, or_': [(query, '__or__'), ('rethinkdb.', 'or_')],
'==, eq': [(query, '__eq__'), (query, 'eq')],
'!=, ne': [(query, '__ne__'), (query, 'ne')],
'<, lt': [(query, '__lt__'), (query, 'lt')],
'>, gt': [(query, '__gt__'), (query, 'gt')],
'<=, le': [(query, '__le__'), (query, 'le')],
'>=, ge': [(query, '__ge__'), (query, 'ge')],
'~, not_': [(query, '__invert__'), (query, 'not_'), ('rethinkdb.', 'not_')],
'r': [('', 'rethinkdb')],
'repl': [('rethinkdb.net.Connection.', 'repl')],
'count': lambda parent: not parent == 'rethinkdb.' and [(query, 'count')] or [],
'rethinkdb': [('', 'rethinkdb')],
'to_json_string, to_json': [(query, 'to_json_string'), (query, 'to_json')],
'for': [],
'list': [],
'set_loop_type': [('rethinkdb.', 'set_loop_type')]
}
# Write the header of the docs.py file
def write_header(file):
commit = subprocess.Popen(['git', 'log', '-n', '1', '--pretty=format:"%H"'], stdout=subprocess.PIPE).communicate()[0].decode('utf-8')
file.write('''# -*- coding: utf-8 -*-
# This file was generated by _scripts/gen_python.py from the rethinkdb documentation in http://github.com/rethinkdb/docs
# hash: %s
import rethinkdb
docsSource = [
''' % commit)
def write_footer(file):
'''write the ending of the file'''
file.write('''
]
for function, text in docsSource:
try:
text = str(text.decode('utf-8'))
except UnicodeEncodeError:
pass
if hasattr(function, "__func__"):
function.__func__.__doc__ = text
else:
function.__doc__ = text
''')
# Browse all the docs
def browse_files(base, result_file):
subdirlist = []
# Because we don't read from the json file, that is enough to guarantee an order
for item in sorted(os.listdir(base)):
if item[0] != '.' and item not in IGNORED_FILES:
full_path = os.path.join(base, item)
if os.path.isfile(full_path):
add_doc(full_path, result_file)
else:
subdirlist.append(full_path)
for subdir in subdirlist:
browse_files(subdir, result_file)
# Add docs in result for one file
def add_doc(file_name, result_file):
limiter_yaml = re.compile('---\s*')
is_yaml = False
yaml_header = ""
parent = ""
func = ""
# Reading the JS file to extract the io data
file_name_js = file_name.replace('python', 'javascript')
try:
details_file_js = codecs.open(file_name_js, "r", "utf-8")
yaml_header_js = ""
for line in details_file_js:
if limiter_yaml.match(line) != None:
# We ignore the yaml header
if is_yaml == False:
is_yaml = True
else:
break
elif is_yaml == True:
yaml_header_js += line
yaml_data_js = yaml.load(yaml_header_js)
parent = parents[yaml_data_js['io'][0][0]]
except:
# The file may not exist (for repl for example)
pass
# Open the python file
details_file = codecs.open(file_name, "r", "utf-8")
# Define some regex that we will use
ignore_pattern = re.compile("#.*#.*|<img.*/>") # Used to skip the titles like Description, Related commands etc.
# Used to skip the body (command syntax)
start_body_pattern = re.compile("{%\s*apibody\s*%}\s*")
end_body_pattern = re.compile("{%\s*endapibody\s*%}\s*")
parsing_body = False
# Used to skip info boxes
start_infobox_pattern = re.compile("{%\s*infobox( alert|)\s*%}\s*")
end_infobox_pattern = re.compile("{%\s*endinfobox\s*%}\s*")
parsing_infobox = False
# Used to convert relative Markdown links to absolute
link_match_pattern = re.compile(r'\[(.*?)\]\(/')
link_replace_pattern = r'[\1](http://rethinkdb.com/'
# Used to remove reference-style links (we just keep their titles)
ref_match_pattern = re.compile(r'\[(.*?)\]\[[^\]]*\]')
ref_replace_pattern = r'\1'
ref_link_match_pattern = re.compile(r'^\[.*?\]:.+$')
# Tracking the yaml header, we need it for the command name
is_yaml = False
yaml_header_py = ""
# Track if we are parsing some code
example_code_start_pattern = re.compile("```py")
example_code_end_pattern = re.compile("```")
parsing_example_code = False
text = ""
for line in details_file:
# Ignore titles (h1 tags)
if ignore_pattern.match(line) != None:
continue
if limiter_yaml.match(line) != None:
# We ignore the yaml header
if is_yaml == False:
is_yaml = True
else:
yaml_data_py = yaml.load(yaml_header_py)
name = yaml_data_py["command"]
is_yaml = False
elif is_yaml == True:
yaml_header_py += line
elif is_yaml == False:
if start_body_pattern.match(line) != None:
parsing_body = True
elif end_body_pattern.match(line) != None:
parsing_body = False
elif start_infobox_pattern.match(line) != None:
parsing_infobox = True
elif end_infobox_pattern.match(line) != None:
parsing_infobox = False
elif parsing_infobox:
continue
elif parsing_body == False:
if example_code_start_pattern.match(line) != None:
parsing_example_code = True
elif example_code_end_pattern.match(line) != None:
parsing_example_code = False
else:
if parsing_example_code == True:
text += " " + line
elif ref_link_match_pattern.match(line) == None:
line = re.sub(link_match_pattern,
link_replace_pattern, line)
line = re.sub(ref_match_pattern,
ref_replace_pattern, line)
text += line
else:
text += line.replace('→', '->')
encoded = repr(re.sub("(__Example:__)|(__Example__:)", "*Example*", re.sub("^\n+", "", re.sub("\n{2,}", "\n\n", text))).encode('utf-8'))
if not encoded.startswith('b'): # append the binary marker for Python3 when generateing from Python2.6
encoded = 'b' + encoded
# If the command has multiple name, parents
if name in tags:
names = tags[name]
if type(names) == type(lambda x: x):
names = names(parent)
for parent, name in names:
result_file.write("\n\t(" + parent + name + ", " + encoded + '),')
else: # If the command has just one name and one parent
assert parent not in (None, ''), 'Missing the parent entry for: %s in %s' % (name, file_name)
result_file.write("\n\t(" + parent + name + ", " + encoded + '),')
if __name__ == "__main__":
script_path = os.path.dirname(os.path.realpath(__file__))
result_file = codecs.open(script_path+"/docs.py", "w", "utf-8")
write_header(result_file)
browse_files(script_path+"/../api/python/", result_file)
write_footer(result_file)
result_file.close()