Skip to content

Commit 5ceef65

Browse files
committed
新增表单解析
1 parent 0c47e5c commit 5ceef65

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

tools/html.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,36 @@ def test_replace_all():
173173
print replace_all('ffff\n\r\ngggg\ndfdf', replacements)
174174

175175

176+
def get_form(html, form_index=0, filter_tag_name_list=None, skip_tag_name_list=None):
177+
"""
178+
获取表单
179+
:param html:
180+
:param form_index:
181+
:param filter_tag_name_list:设置需要的标签名称列表
182+
:param skip_tag_name_list:设置取消的标签名称列条
183+
:return:
184+
"""
185+
from lxml.html import fromstring
186+
forms = fromstring(html).forms
187+
form = forms[form_index]
188+
data = {}
189+
for name, value in form.fields.iteritems():
190+
# 跳过
191+
if skip_tag_name_list:
192+
if name in skip_tag_name_list:
193+
continue
194+
# 字符串
195+
if value is None:
196+
value = ''
197+
# 过滤
198+
if filter_tag_name_list:
199+
if name in filter_tag_name_list:
200+
data[name] = value
201+
else:
202+
data[name] = value
203+
return data
204+
205+
176206
if __name__ == '__main__':
177207
test_html = '''<h2>多云</h2> '''
178208
print replace_html(test_html)

0 commit comments

Comments
 (0)