Skip to content

Commit e4aace1

Browse files
fix datasets dependency for snapshot download (#1581)
1 parent a7f50d4 commit e4aace1

File tree

5 files changed

+15
-6
lines changed

5 files changed

+15
-6
lines changed

modelscope/hub/api.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,8 @@
8282
DownloadChannel, DownloadMode,
8383
Frameworks, ModelFile, Tasks,
8484
VirgoDatasetConfig)
85-
from modelscope.utils.file_utils import get_file_hash, get_file_size
85+
from modelscope.utils.file_utils import (get_file_hash, get_file_size,
86+
is_relative_path)
8687
from modelscope.utils.logger import get_logger
8788
from modelscope.utils.repo_utils import (DATASET_LFS_SUFFIX,
8889
DEFAULT_IGNORE_PATTERNS,
@@ -1461,7 +1462,6 @@ def list_repo_commits(self,
14611462
>>> for commit in commit_history.commits:
14621463
... print(f"{commit.short_id}: {commit.title}")
14631464
"""
1464-
from datasets.utils.file_utils import is_relative_path
14651465

14661466
if is_relative_path(repo_id) and repo_id.count('/') == 1:
14671467
_owner, _dataset_name = repo_id.split('/')
@@ -1520,7 +1520,6 @@ def get_dataset_files(self,
15201520
List: The response containing the dataset repository tree information.
15211521
e.g. [{'CommitId': None, 'CommitMessage': '...', 'Size': 0, 'Type': 'tree'}, ...]
15221522
"""
1523-
from datasets.utils.file_utils import is_relative_path
15241523

15251524
if is_relative_path(repo_id) and repo_id.count('/') == 1:
15261525
_owner, _dataset_name = repo_id.split('/')

modelscope/msdatasets/download/download_manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
from datasets.download.download_manager import DownloadManager
44
from datasets.download.streaming_download_manager import \
55
StreamingDownloadManager
6-
from datasets.utils.file_utils import cached_path, is_relative_path
6+
from datasets.utils.file_utils import cached_path
77

88
from modelscope.msdatasets.download.download_config import DataDownloadConfig
99
from modelscope.msdatasets.utils.oss_utils import OssUtilities
10+
from modelscope.utils.file_utils import is_relative_path
1011

1112

1213
class DataDownloadManager(DownloadManager):

modelscope/msdatasets/ms_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from datasets import (Dataset, DatasetDict, Features, IterableDataset,
1010
IterableDatasetDict)
1111
from datasets.packaged_modules import _PACKAGED_DATASETS_MODULES
12-
from datasets.utils.file_utils import is_relative_path
1312

1413
from modelscope.hub.repository import DatasetRepository
1514
from modelscope.msdatasets.context.dataset_context_config import \
@@ -32,6 +31,7 @@
3231
REPO_TYPE_DATASET, ConfigFields,
3332
DatasetFormations, DownloadMode, Hubs,
3433
ModeKeys, Tasks, UploadMode)
34+
from modelscope.utils.file_utils import is_relative_path
3535
from modelscope.utils.import_utils import is_tf_available, is_torch_available
3636
from modelscope.utils.logger import get_logger
3737

modelscope/msdatasets/utils/hf_datasets_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
from datasets.utils import file_utils
4747
from datasets.utils.file_utils import (_raise_if_offline_mode_is_enabled,
4848
cached_path, is_local_path,
49-
is_relative_path,
5049
relative_to_absolute_path)
5150
from datasets.utils.info_utils import is_small_dataset
5251
from datasets.utils.metadata import MetadataConfigs
@@ -68,6 +67,7 @@
6867
from modelscope.utils.config_ds import MS_DATASETS_CACHE
6968
from modelscope.utils.constant import DEFAULT_DATASET_REVISION, REPO_TYPE_DATASET
7069
from modelscope.utils.import_utils import has_attr_in_class
70+
from modelscope.utils.file_utils import is_relative_path
7171
from modelscope.utils.logger import get_logger
7272

7373
logger = get_logger()

modelscope/utils/file_utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77
from shutil import Error, copy2, copystat
88
from typing import BinaryIO, Optional, Union
9+
from urllib.parse import urlparse
910

1011

1112
# TODO: remove this api, unify to flattened args
@@ -274,3 +275,11 @@ def get_file_hash(
274275
'chunk_nums': len(chunk_hash_list),
275276
'chunk_hash_list': chunk_hash_list,
276277
}
278+
279+
280+
def is_relative_path(url_or_filename: str) -> bool:
281+
"""
282+
Check if a given string is a relative path.
283+
"""
284+
return urlparse(
285+
url_or_filename).scheme == '' and not os.path.isabs(url_or_filename)

0 commit comments

Comments
 (0)