File tree Expand file tree Collapse file tree 1 file changed +17
-3
lines changed
Expand file tree Collapse file tree 1 file changed +17
-3
lines changed Original file line number Diff line number Diff line change 1414url_list = [root_url ] # 爬虫待访问url列表
1515url_visited_list = [] # 爬虫已访问url列表
1616
17+ file_path = 'sohu.txt'
18+ url_file = open (file_path , 'a' )
19+
1720
1821def read_src (src_url = None ):
1922 """
@@ -46,25 +49,36 @@ def print_static(src):
4649 if src .startswith ('http://' ) and src .startswith ('/' ) and (src .endswith ('.jpg' ) or src .endswith ('.gif' ) or src .endswith ('.png' )):
4750 if src .startswith ('/' ):
4851 src = '' .join ([root_url , src ])
49- print '图片:%s' % src
52+ url_file .write (src )
53+ url_file .write ('\n ' )
54+ print '图片:%s' % src
5055 if src .endswith ('.js' ):
5156 if src .startswith ('/' ):
5257 src = '' .join ([root_url , src ])
53- print 'js文件:%s' % src
58+ url_file .write (src )
59+ url_file .write ('\n ' )
60+ print 'js文件:%s' % src
5461 if src .endswith ('.css' ):
5562 if src .startswith ('/' ):
5663 src = '' .join ([root_url , src ])
57- print 'css文件:%s' % src
64+ url_file .write (src )
65+ url_file .write ('\n ' )
66+ print 'css文件:%s' % src
5867
5968
6069def run ():
6170 """
6271 主程序
6372 """
6473 try :
74+ count = 0
6575 while len (url_list ) > 0 :
6676 read_src (url_list .pop (0 ))
77+ count += 1
78+ if count % 10 == 0 :
79+ url_file .flush ()
6780 except KeyboardInterrupt :
81+ url_file .close ()
6882 print '程序退出'
6983
7084
You can’t perform that action at this time.
0 commit comments