-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathdocker_utils.py
More file actions
349 lines (285 loc) · 11.1 KB
/
docker_utils.py
File metadata and controls
349 lines (285 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
from __future__ import annotations
import docker
import logging
import os
import signal
import tarfile
import threading
import time
import traceback
from pathlib import Path
from io import BytesIO
from typing import Optional, List
import docker.errors
from docker.models.containers import Container
HEREDOC_DELIMITER = "EOF_1399519320" # different from dataset HEREDOC_DELIMITERs!
def copy_to_container(container: Container, src: Path, dst: Path) -> None:
"""Copy a file from local to a docker container
Args:
----
container (Container): Docker container to copy to
src (Path): Source file path
dst (Path): Destination file path in the container
"""
# Check if destination path is valid
if os.path.dirname(dst) == "":
raise ValueError(
f"Destination path parent directory cannot be empty!, dst: {dst}"
)
# temporary tar file
tar_path = src.with_suffix(".tar")
with tarfile.open(tar_path, "w") as tar:
tar.add(src, arcname=src.name)
# get bytes for put_archive cmd
with open(tar_path, "rb") as tar_file:
data = tar_file.read()
# Make directory if necessary
container.exec_run(f"mkdir -p {dst.parent}")
# Send tar file to container and extract
container.put_archive(os.path.dirname(dst), data)
container.exec_run(f"tar -xf {dst}.tar -C {dst.parent}")
# clean up in locally and in container
tar_path.unlink()
container.exec_run(f"rm {dst}.tar")
def copy_from_container(container: Container, src: Path, dst: Path) -> None:
"""Copy a file from a docker container to local
Args:
----
container (Container): Docker container to copy from
src (Path): Source file path in the container
dst (Path): Destination file path locally
"""
if not isinstance(src, Path):
src = Path(src)
if not isinstance(dst, Path):
dst = Path(dst)
# Ensure destination directory exists
if not dst.parent.exists():
os.makedirs(dst.parent)
# Copy the file out of the container
stream, stat = container.get_archive(str(src))
# Create a temporary tar file
tar_stream = BytesIO()
for chunk in stream:
tar_stream.write(chunk)
tar_stream.seek(0)
with tarfile.open(fileobj=tar_stream, mode="r") as tar:
# Extract file from tar stream
def is_within_directory(directory: str, target: str) -> bool:
abs_directory = os.path.abspath(directory)
abs_target = os.path.abspath(target)
prefix = os.path.commonprefix([abs_directory, abs_target])
return prefix == abs_directory
def safe_extract(
tar: tarfile.TarFile,
path: str = ".",
members: Optional[List[tarfile.TarInfo]] = None,
*,
numeric_owner: bool = False,
) -> None:
for member in tar.getmembers():
member_path = os.path.join(path, member.name)
if not is_within_directory(path, member_path):
raise Exception("Attempted Path Traversal in Tar File")
tar.extractall(path, members, numeric_owner=numeric_owner)
safe_extract(tar, path=str(dst.parent))
# Move the extracted file to desired dst path if tar extraction gives src.name
extracted_file_path = dst.parent / src.name
if extracted_file_path != dst:
extracted_file_path.rename(dst)
def write_to_container(container: Container, data: str, dst: Path) -> None:
"""Write a string to a file in a docker container"""
# echo with heredoc to file
command = f"cat <<'{HEREDOC_DELIMITER}' > {dst}\n{data}\n{HEREDOC_DELIMITER}"
container.exec_run(command)
def cleanup_container(
client: docker.DockerClient,
container: Container,
logger: logging.Logger,
) -> None:
"""Stop and remove a Docker container.
Performs this forcefully if the container cannot be stopped with the python API.
Args:
----
client (docker.DockerClient): Docker client.
container (docker.Container): Container to remove.
logger (logging.Logger): Logger instance or log level as string for logging container creation messages.
"""
if not container:
return
container_id = container.id
# Attempt to stop the container
try:
if container:
logger.info(f"Attempting to stop container {container.name}...")
container.kill()
except Exception as e:
logger.error(
f"Failed to stop container {container.name}: {e}. Trying to forcefully kill..."
)
try:
# Get the PID of the container
assert container_id is not None
container_info = client.api.inspect_container(container_id)
pid = container_info["State"].get("Pid", 0)
# If container PID found, forcefully kill the container
if pid > 0:
logger.info(
f"Forcefully killing container {container.name} with PID {pid}..."
)
os.kill(pid, signal.SIGKILL)
else:
logger.error(
f"PID for container {container.name}: {pid} - not killing."
)
except Exception as e2:
raise Exception(
f"Failed to forcefully kill container {container.name}: {e2}\n"
f"{traceback.format_exc()}"
)
# Attempt to remove the container
try:
logger.info(f"Attempting to remove container {container.name}...")
container.remove(force=True)
logger.info(f"Container {container.name} removed.")
except Exception as e:
raise Exception(
f"Failed to remove container {container.name}: {e}\n"
f"{traceback.format_exc()}"
)
def image_exists_locally(
client: docker.DockerClient, image_name: str, tag: str, logger: logging.Logger
) -> bool:
"""Check if a Docker image exists locally.
Args:
----
client (docker.DockerClient): Docker client instance.
image_name (str): The name of the Docker image.
tag (str, optional): Tag of the Docker image.
logger (logging.Logger): Logger instance.
Returns:
-------
bool: True if the image exists locally, False otherwise.
"""
images = client.images.list(name=image_name)
for image in images:
if f"{image_name}:{tag}" in image.tags:
logger.info(f"Using {image_name}:{tag} found locally.")
return True
logger.info(f"{image_name}:{tag} cannot be found locally")
return False
def pull_image_from_docker_hub(
client: docker.DockerClient, image_name: str, tag: str, logger: logging.Logger
) -> None:
"""Pull a Docker image from Docker Hub.
Args:
----
client (docker.DockerClient): Docker client instance.
image_name (str): The name of the Docker image.
tag (str, optional): Tag of the Docker image.
logger (logging.Logger): Logger instance.
Returns:
-------
docker.models.images.Image: The pulled Docker image.
Raises:
------
docker.errors.ImageNotFound: If the image is not found on Docker Hub.
docker.errors.APIError: If there's an issue with the Docker API during the pull.
"""
try:
client.images.pull(image_name, tag=tag)
logger.info(f"Loaded {image_name}:{tag} from Docker Hub.")
except docker.errors.ImageNotFound:
raise Exception(f"Image {image_name}:{tag} not found on Docker Hub.")
except docker.errors.APIError as e:
raise Exception(f"Error pulling image: {e}")
def create_container(
client: docker.DockerClient,
image_name: str,
container_name: str,
logger: logging.Logger,
user: Optional[str] = None,
command: Optional[str] = "tail -f /dev/null",
nano_cpus: Optional[int] = None,
) -> Container:
"""Start a Docker container using the specified image.
Args:
----
client (docker.DockerClient): Docker client.
image_name (str): The name of the Docker image.
container_name (str): Name for the Docker container.
logger (logging.Logger): Logger instance or log level as string for logging container creation messages.
user (str, option): Log in as which user. Defaults to None.
command (str, optional): Command to run in the container. Defaults to None.
nano_cpus (int, optional): The number of CPUs for the container. Defaults to None.
Returns:
-------
docker.models.containers.Container: The started Docker container.
Raises:
------
docker.errors.APIError: If there's an error interacting with the Docker API.
Exception: For other general errors.
"""
image, tag = image_name.split(":")
if not image_exists_locally(client, image, tag, logger):
pull_image_from_docker_hub(client, image, tag, logger)
container = None
try:
logger.info(f"Creating container for {image_name}...")
container = client.containers.run(
image=image_name,
name=container_name,
user=user,
command=command,
nano_cpus=nano_cpus,
detach=True,
)
logger.info(f"Container for {image_name} created: {container.id}")
return container
except Exception as e:
# If an error occurs, clean up the container and raise an exception
logger.error(f"Error creating container for {image_name}: {e}")
logger.info(traceback.format_exc())
assert container is not None
cleanup_container(client, container, logger)
raise
def exec_run_with_timeout(
container: Container, cmd: str, timeout: Optional[int] = 60
) -> tuple[str, bool, float]:
"""Run a command in a container with a timeout.
Args:
----
container (Container): Container to run the command in.
cmd (str): Command to run.
timeout (int): Timeout in seconds.
"""
# Local variables to store the result of executing the command
exec_result = ""
exec_id = None
timed_out = False
# Wrapper function to run the command
def run_command() -> None:
nonlocal exec_result, exec_id
try:
exec_id = container.client.api.exec_create(container=container.id, cmd=cmd)[ # pyright: ignore
"Id"
]
exec_stream = container.client.api.exec_start(exec_id=exec_id, stream=True) # pyright: ignore
for chunk in exec_stream:
exec_result += chunk.decode("utf-8", errors="replace")
except docker.errors.APIError as e:
raise Exception(f"Container {container.id} cannot execute {cmd}.\n{str(e)}")
# Start the command in a separate thread
thread = threading.Thread(target=run_command)
start_time = time.time()
thread.start()
thread.join(timeout)
# If the thread is still alive, the command timed out
if thread.is_alive():
if exec_id is not None:
exec_pid = container.client.api.exec_inspect(exec_id=exec_id)["Pid"] # pyright: ignore
container.exec_run(f"kill -TERM {exec_pid}", detach=True)
timed_out = True
end_time = time.time()
return exec_result, timed_out, end_time - start_time
__all__ = []