-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathbuild.py
More file actions
55 lines (47 loc) · 1.57 KB
/
build.py
File metadata and controls
55 lines (47 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import logging
import docker
from datasets import load_dataset
from typing import Iterator, Union
from commit0.harness.constants import RepoInstance, SimpleInstance, SPLIT
from commit0.harness.docker_build import build_repo_images
from commit0.harness.spec import make_spec
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def main(
dataset_name: str,
dataset_split: str,
split: str,
num_workers: int,
verbose: int,
) -> None:
dataset: Iterator[Union[RepoInstance, SimpleInstance]] = load_dataset(
dataset_name, split=dataset_split
) # type: ignore
specs = []
dataset_name = dataset_name.lower()
if "swe" in dataset_name:
dataset_type = "swebench"
elif (
"humaneval" in dataset_name
or "mbpp" in dataset_name
or "bigcodebench" in dataset_name
or "codecontests" in dataset_name
):
dataset_type = "simple"
else:
dataset_type = "commit0"
for example in dataset:
if "swe" in dataset_name or dataset_type == "simple":
if split != "all" and split not in example["instance_id"]:
continue
else:
repo_name = example["repo"].split("/")[-1]
if split != "all" and repo_name not in SPLIT[split]:
continue
spec = make_spec(example, dataset_type, absolute=True)
specs.append(spec)
client = docker.from_env()
build_repo_images(client, specs, dataset_type, num_workers, verbose)
__all__ = []