Skip to content

Commit 6f52cb7

Browse files
committed
更新sohu静态文件保存
1 parent 5499c10 commit 6f52cb7

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

fuck/sohu.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
url_list = [root_url] # 爬虫待访问url列表
1515
url_visited_list = [] # 爬虫已访问url列表
1616

17+
file_path = 'sohu.txt'
18+
url_file = open(file_path, 'a')
19+
1720

1821
def read_src(src_url=None):
1922
"""
@@ -46,25 +49,36 @@ def print_static(src):
4649
if src.startswith('http://') and src.startswith('/') and (src.endswith('.jpg') or src.endswith('.gif') or src.endswith('.png')):
4750
if src.startswith('/'):
4851
src = ''.join([root_url, src])
49-
print '图片:%s' % src
52+
url_file.write(src)
53+
url_file.write('\n')
54+
print '图片:%s' % src
5055
if src.endswith('.js'):
5156
if src.startswith('/'):
5257
src = ''.join([root_url, src])
53-
print 'js文件:%s' % src
58+
url_file.write(src)
59+
url_file.write('\n')
60+
print 'js文件:%s' % src
5461
if src.endswith('.css'):
5562
if src.startswith('/'):
5663
src = ''.join([root_url, src])
57-
print 'css文件:%s' % src
64+
url_file.write(src)
65+
url_file.write('\n')
66+
print 'css文件:%s' % src
5867

5968

6069
def run():
6170
"""
6271
主程序
6372
"""
6473
try:
74+
count = 0
6575
while len(url_list) > 0:
6676
read_src(url_list.pop(0))
77+
count += 1
78+
if count % 10 == 0:
79+
url_file.flush()
6780
except KeyboardInterrupt:
81+
url_file.close()
6882
print '程序退出'
6983

7084

0 commit comments

Comments
 (0)