-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathdocker_build.py
More file actions
276 lines (241 loc) · 9.86 KB
/
docker_build.py
File metadata and controls
276 lines (241 loc) · 9.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import logging
import re
import traceback
import docker
import docker.errors
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Any
from commit0.harness.constants import (
BASE_IMAGE_BUILD_DIR,
REPO_IMAGE_BUILD_DIR,
)
from commit0.harness.spec import get_specs_from_dataset
from commit0.harness.utils import setup_logger, close_logger
ansi_escape = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
class BuildImageError(Exception):
def __init__(self, image_name: str, message: str, logger: logging.Logger):
super().__init__(message)
self.super_str = super().__str__()
self.image_name = image_name
self.log_path = "" # logger.log_file
self.logger = logger
def __str__(self):
return (
f"Error building image {self.image_name}: {self.super_str}\n"
f"Check ({self.log_path}) for more information."
)
def build_image(
image_name: str,
setup_scripts: dict,
dockerfile: str,
platform: str,
client: docker.DockerClient,
build_dir: Path,
nocache: bool = False,
) -> None:
"""Builds a docker image with the given name, setup scripts, dockerfile, and platform.
Args:
----
image_name (str): Name of the image to build
setup_scripts (dict): Dictionary of setup script names to setup script contents
dockerfile (str): Contents of the Dockerfile
platform (str): Platform to build the image for
client (docker.DockerClient): Docker client to use for building the image
build_dir (Path): Directory for the build context (will also contain logs, scripts, and artifacts)
nocache (bool): Whether to use the cache when building
"""
# Create a logger for the build process
logger = setup_logger(image_name, build_dir / "build_image.log")
logger.info(
f"Building image {image_name}\n"
f"Using dockerfile:\n{dockerfile}\n"
f"Adding ({len(setup_scripts)}) setup scripts to image build repo"
)
for setup_script_name, setup_script in setup_scripts.items():
logger.info(f"[SETUP SCRIPT] {setup_script_name}:\n{setup_script}")
try:
# Write the setup scripts to the build directory
for setup_script_name, setup_script in setup_scripts.items():
setup_script_path = build_dir / setup_script_name
with open(setup_script_path, "w") as f:
f.write(setup_script)
if setup_script_name not in dockerfile:
logger.warning(
f"Setup script {setup_script_name} may not be used in Dockerfile"
)
# Write the dockerfile to the build directory
dockerfile_path = build_dir / "Dockerfile"
with open(dockerfile_path, "w") as f:
f.write(dockerfile)
# Build the image
logger.info(
f"Building docker image {image_name} in {build_dir} with platform {platform}"
)
response = client.api.build(
path=str(build_dir),
tag=image_name,
rm=True,
forcerm=True,
decode=True,
platform=platform,
nocache=nocache,
)
# Log the build process continuously
for chunk in response:
if "stream" in chunk:
# Remove ANSI escape sequences from the log
chunk_stream = ansi_escape.sub("", chunk["stream"])
logger.info(chunk_stream.strip())
logger.info("Image built successfully!")
except docker.errors.APIError as e:
logger.error(f"docker.errors.APIError during {image_name}: {e}")
raise BuildImageError(image_name, str(e), logger) from e
except Exception as e:
logger.error(f"Error building image {image_name}: {e}")
raise BuildImageError(image_name, str(e), logger) from e
finally:
close_logger(logger) # functions that create loggers should close them
def build_base_images(
client: docker.DockerClient, dataset: list, dataset_type: str
) -> None:
"""Builds the base images required for the dataset if they do not already exist.
Args:
----
client (docker.DockerClient): Docker client to use for building the images
dataset (list): List of test specs or dataset to build images for
dataset_type(str): The type of dataset. Choices are commit0 and swebench
"""
# Get the base images to build from the dataset
test_specs = get_specs_from_dataset(dataset, dataset_type, absolute=True)
base_images = {
x.base_image_key: (x.base_dockerfile, x.platform) for x in test_specs
}
# Build the base images
for image_name, (dockerfile, platform) in base_images.items():
try:
# Check if the base image already exists
client.images.get(image_name)
print(f"Base image {image_name} already exists, skipping build.")
continue
except docker.errors.ImageNotFound:
pass
# Build the base image (if it does not exist or force rebuild is enabled)
print(f"Building base image ({image_name})")
build_image(
image_name=image_name,
setup_scripts={},
dockerfile=dockerfile,
platform=platform,
client=client,
build_dir=BASE_IMAGE_BUILD_DIR / image_name.replace(":", "__"),
)
print("Base images built successfully.")
def get_repo_configs_to_build(
client: docker.DockerClient, dataset: list, dataset_type: str
) -> dict[str, Any]:
"""Returns a dictionary of image names to build scripts and dockerfiles for repo images.
Returns only the repo images that need to be built.
Args:
----
client (docker.DockerClient): Docker client to use for building the images
dataset (list): List of test specs or dataset to build images for
dataset_type(str): The type of dataset. Choices are commit0 and swebench
"""
image_scripts = dict()
test_specs = get_specs_from_dataset(dataset, dataset_type, absolute=True)
for test_spec in test_specs:
# Check if the base image exists
try:
client.images.get(test_spec.base_image_key)
except docker.errors.ImageNotFound:
raise Exception(
f"Base image {test_spec.base_image_key} not found for {test_spec.repo_image_key}\n."
"Please build the base images first."
)
# Check if the repo image exists
image_exists = False
try:
client.images.get(test_spec.repo_image_key)
image_exists = True
except docker.errors.ImageNotFound:
pass
if not image_exists:
# Add the repo image to the list of images to build
image_scripts[test_spec.repo_image_key] = {
"setup_script": test_spec.setup_script,
"dockerfile": test_spec.repo_dockerfile,
"platform": test_spec.platform,
}
return image_scripts
def build_repo_images(
client: docker.DockerClient,
dataset: list,
dataset_type: str,
max_workers: int = 4,
verbose: int = 1,
) -> tuple[list[str], list[str]]:
"""Builds the repo images required for the dataset if they do not already exist.
Args:
----
client (docker.DockerClient): Docker client to use for building the images
dataset (list): List of test specs or dataset to build images for
dataset_type(str): The type of dataset. Choices are commit0 and swebench
max_workers (int): Maximum number of workers to use for building images
verbose (int): Level of verbosity
Return:
------
successful: a list of docker image keys for which build were successful
failed: a list of docker image keys for which build failed
"""
build_base_images(client, dataset, dataset_type)
configs_to_build = get_repo_configs_to_build(client, dataset, dataset_type)
if len(configs_to_build) == 0:
print("No repo images need to be built.")
return [], []
print(f"Total repo images to build: {len(configs_to_build)}")
# Build the repo images
successful, failed = list(), list()
with tqdm(
total=len(configs_to_build), smoothing=0, desc="Building repo images"
) as pbar:
with ThreadPoolExecutor(max_workers=max_workers) as executor:
# Create a future for each image to build
futures = {
executor.submit(
build_image,
image_name,
{"setup.sh": config["setup_script"]},
config["dockerfile"],
config["platform"],
client,
REPO_IMAGE_BUILD_DIR / image_name.replace(":", "__"),
): image_name
for image_name, config in configs_to_build.items()
}
# Wait for each future to complete
for future in as_completed(futures):
pbar.update(1)
try:
# Update progress bar, check if image built successfully
future.result()
successful.append(futures[future])
except BuildImageError as e:
print(f"BuildImageError {e.image_name}")
traceback.print_exc()
failed.append(futures[future])
continue
except Exception:
print("Error building image")
traceback.print_exc()
failed.append(futures[future])
continue
# Show how many images failed to build
if len(failed) == 0:
print("All repo images built successfully.")
else:
print(f"{len(failed)} repo images failed to build.")
# Return the list of (un)successfuly built images
return successful, failed
__all__ = []