beautifulsoup 测试

damnit1989 · damnit1989 · commit aa2a1e620f8c · 2017-10-18T22:36:48.000-07:00
diff --git a/beautifulsoup_test.py b/beautifulsoup_test.py
@@ -2,9 +2,20 @@
 # -*- coding: utf-8 -*-
 # BeautifulSoup  &#29228;&#34411;&#32451;&#20064;
 
-from bs4 import BeautifulSoup
+# &#23433;&#35013;BeautifulSoup
+# &#26041;&#27861;&#19968;&#65306;easy_install beautifulsoup4
+# &#26041;&#27861;&#20108;&#65306;pip install beautifulsoup4
+
 
+# Beautiful Soup&#25903;&#25345;Python&#26631;&#20934;&#24211;&#20013;&#30340;HTML&#35299;&#26512;&#22120;(html.parser),
+# &#36824;&#25903;&#25345;&#19968;&#20123;&#31532;&#19977;&#26041;&#30340;&#35299;&#26512;&#22120;(lxml,html5lib&#31561;),&#22914;&#26524;&#25105;&#20204;&#19981;&#23433;&#35013;&#23427;,&#21017; Python &#20250;&#20351;&#29992; Python&#40664;&#35748;&#30340;&#35299;&#26512;&#22120;,lxml &#35299;&#26512;&#22120;&#26356;&#21152;&#24378;&#22823;,&#36895;&#24230;&#26356;&#24555;,&#25512;&#33616;&#23433;&#35013;
 
+
+# &#23548;&#20837;&#27169;&#22359;
+from bs4 import BeautifulSoup
+import urllib
+
+# &#27979;&#35797;html
 html = """
 <html><head><title>The Dormouse's story</title></head>
 <body>
@@ -17,8 +28,7 @@
 <p class="story">...</p>
 """
 
-
-# soup = BeautifulSoup(html)
+# soup = BeautifulSoup(html,'lxml)
 soup = BeautifulSoup(html)
 print soup.name
 print type(soup.head)
@@ -57,21 +67,17 @@
 # print soup.a.string
 # if type(soup.a.string) == 'bs4.element.Comment':
     # print type(soup.a.string)
-    
 # for content in soup.p.contents:
     # print content
-
 # for child in soup.body.children:
     # print child
-
 # for child in soup.descendants:
     # print child
-    
 # for str in soup.stripped_strings :
     # print str
-if __name__ == '__main__':
-    # pass
-    import urllib
+
+# &#23567;&#23567;&#30340;&#27979;&#35797;
+def test():
     url_handle = urllib.urlopen('https://www.python.org/events/python-events/')
     html = url_handle.read()
     soup = BeautifulSoup(html)
@@ -81,4 +87,9 @@
         print '&#20250;&#35758;:',x.h3.get_text()
         print '&#26102;&#38388;:',x.p.time.get_text()
         print '&#20869;&#23481;',x.p.select('span[class="event-location"]')[0].get_text()
+
+        
+if __name__ == '__main__':
+    test()
+