forked from looly/python_script
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimgDownloader.py
More file actions
executable file
·70 lines (59 loc) · 1.84 KB
/
imgDownloader.py
File metadata and controls
executable file
·70 lines (59 loc) · 1.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# -*-coding:utf8-*-
#
# Author:Looly
# 用于批量下载图片
import sys
import os
import urllib
import hashlib
#----------------------------------------------------
#保存到本地文件的扩展名
EXT = '.jpg'
#本地保存路径
DIR = './images'
#----------------------------------------------------
def readUrls(path):
'''读取url列表'''
with open(path) as file:
return file.readlines()
def checkFile(finishedBlock, blockSize, totalSize):
'''检查下载进度'''
finishedSize = finishedBlock * blockSize;
if finishedSize > totalSize: finishedSize = totalSize;
percent = 100 * finishedSize / totalSize;
print('%.2f%%, Finished: %s, total: %s' % (percent, finishedSize, totalSize))
def buildPath(url, dir):
'''构建本地路径'''
if '' == dir:
dir = './'
elif '/' != dir[-1]:
dir = dir + '/'
fileName = hashlib.md5(url).hexdigest() + EXT
#采用三级目录存储,按照MD5值的第一个和第二个字符分目录
dir = dir + fileName[0] + '/' + fileName[1] + '/'
mkdirs(dir)
return dir + fileName
def download(url, dir=DIR):
'''下载'''
path = buildPath(url, dir)
print('Save file to ' + path)
##TODO 这个方法在遇到非法路径时会抛出异常,对404页面无法识别,后续改进
urllib.urlretrieve(url, path, checkFile)
def start():
'''启动服务'''
if len(sys.argv) < 2 or '' == sys.argv[1]:
print('ERROR: Please provide url path as first argument!')
return
path = sys.argv[1]
for line in readUrls(path):
line = line.strip()
print('Download ' + line)
download(line)
def mkdirs(path):
'''创建逐层目录,忽略已存在的目录'''
if not os.path.exists(path):
os.makedirs(path)
#主入口
if __name__ == '__main__':
start()