Skip to content

Commit 69b5c9b

Browse files
committed
Automate the build artefact downloading from github and appveyor.
1 parent 61432a8 commit 69b5c9b

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed

download_artefacts.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/usr/bin/python3
2+
3+
import itertools
4+
import json
5+
import logging
6+
import re
7+
import shutil
8+
import datetime
9+
10+
from concurrent.futures import ProcessPoolExecutor as Pool, as_completed
11+
from pathlib import Path
12+
from urllib.request import urlopen
13+
from urllib.parse import urljoin
14+
15+
logger = logging.getLogger()
16+
17+
PARALLEL_DOWNLOADS = 6
18+
GITHUB_PACKAGE_URL = "https://github.com/lxml/lxml-wheels"
19+
APPVEYOR_PACKAGE_URL = "https://ci.appveyor.com/api/projects/scoder/lxml"
20+
APPVEYOR_BUILDJOBS_URL = "https://ci.appveyor.com/api/buildjobs"
21+
22+
23+
def find_github_files(version, base_package_url=GITHUB_PACKAGE_URL):
24+
url = f"{base_package_url}/releases/tag/lxml-{version}"
25+
with urlopen(url) as p:
26+
page = p.read().decode()
27+
28+
for wheel_url, _ in itertools.groupby(sorted(re.findall(r'href="([^"]+\.whl)"', page))):
29+
yield urljoin(base_package_url, wheel_url)
30+
31+
32+
def find_appveyor_files(version, base_package_url=APPVEYOR_PACKAGE_URL, base_job_url=APPVEYOR_BUILDJOBS_URL):
33+
url = f"{base_package_url}/history?recordsNumber=20"
34+
with urlopen(url) as p:
35+
builds = json.load(p)["builds"]
36+
37+
tag = f"lxml-{version}"
38+
for build in builds:
39+
if build['isTag'] and build['tag'] == tag:
40+
build_id = build['buildId']
41+
break
42+
else:
43+
logger.warning(f"No appveyor build found for tag '{tag}'")
44+
return
45+
46+
build_url = f"{base_package_url}/builds/{build_id}"
47+
with urlopen(build_url) as p:
48+
jobs = json.load(p)["build"]["jobs"]
49+
50+
for job in jobs:
51+
artifacts_url = f"{base_job_url}/{job['jobId']}/artifacts/"
52+
53+
with urlopen(artifacts_url) as p:
54+
for artifact in json.load(p):
55+
yield urljoin(artifacts_url, artifact['fileName'])
56+
57+
58+
def download1(wheel_url, dest_dir):
59+
wheel_name = wheel_url.rsplit("/", 1)[1]
60+
logger.info(f"Downloading {wheel_url} ...")
61+
with urlopen(wheel_url) as w:
62+
file_path = dest_dir / wheel_name
63+
if (file_path.exists()
64+
and "Content-Length" in w.headers
65+
and file_path.stat().st_size == int(w.headers["Content-Length"])):
66+
logger.info(f"Already have {wheel_name}")
67+
else:
68+
try:
69+
with open(file_path, "wb") as f:
70+
shutil.copyfileobj(w, f)
71+
except:
72+
if file_path.exists():
73+
file_path.unlink()
74+
raise
75+
else:
76+
logger.info(f"Finished downloading {wheel_name}")
77+
return wheel_name
78+
79+
80+
def download(urls, dest_dir, jobs=PARALLEL_DOWNLOADS):
81+
with Pool(max_workers=jobs) as pool:
82+
futures = [pool.submit(download1, url, dest_dir) for url in urls]
83+
try:
84+
for future in as_completed(futures):
85+
wheel_name = future.result()
86+
yield wheel_name
87+
except KeyboardInterrupt:
88+
for future in futures:
89+
future.cancel()
90+
raise
91+
92+
93+
def roundrobin(*iterables):
94+
"roundrobin('ABC', 'D', 'EF') --> A D E B F C"
95+
# Recipe credited to George Sakkis
96+
from itertools import cycle, islice
97+
num_active = len(iterables)
98+
nexts = cycle(iter(it).__next__ for it in iterables)
99+
while num_active:
100+
try:
101+
for next in nexts:
102+
yield next()
103+
except StopIteration:
104+
# Remove the iterator we just exhausted from the cycle.
105+
num_active -= 1
106+
nexts = cycle(islice(nexts, num_active))
107+
108+
109+
def main(*args):
110+
if not args:
111+
print("Please pass the version to download")
112+
return
113+
114+
version = args[0]
115+
dest_dir = Path("dist") / version
116+
if not dest_dir.is_dir():
117+
dest_dir.mkdir()
118+
119+
start_time = datetime.datetime.now().replace(microsecond=0)
120+
urls = roundrobin(
121+
find_github_files(version),
122+
find_appveyor_files(version),
123+
)
124+
count = sum(1 for _ in enumerate(download(urls, dest_dir)))
125+
duration = datetime.datetime.now().replace(microsecond=0) - start_time
126+
logger.info(f"Downloaded {count} files in {duration}.")
127+
128+
129+
if __name__ == "__main__":
130+
import sys
131+
logging.basicConfig(
132+
stream=sys.stderr,
133+
level=logging.INFO,
134+
format="%(asctime)-15s %(message)s",
135+
)
136+
main(*sys.argv[1:])

0 commit comments

Comments
 (0)