Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
refactor: remove special cases
Signed-off-by: jupyterjazz <[email protected]>
  • Loading branch information
jupyterjazz committed Jun 26, 2023
commit d438dc626d3014887c17615a4fb43a327ec4ad11
35 changes: 8 additions & 27 deletions docarray/typing/url/any_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@

T = TypeVar('T', bound='AnyUrl')

mime_types_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), '..', 'resources', 'mime.types'
)
mimetypes.init([mime_types_path])
mimetypes.init([])


@_register_proto(proto_type_name='any_url')
Expand Down Expand Up @@ -68,25 +65,12 @@ def is_extension_allowed(cls, value: Any) -> bool:
return True
mimetype, _ = mimetypes.guess_type(value.split("?")[0])
print('mimetype for value', mimetype, value, value.split("?")[0])
if mimetype:
return mimetype.startswith(cls.mime_type())
else:
# check if the extension is among the extra extensions of that class
print('extra extensions for value', value, cls.extra_extensions())
return any(
value.endswith(ext) or value.split("?")[0].endswith(ext)
for ext in cls.extra_extensions()
)

@classmethod
def is_special_case(cls, value: Any) -> bool:
"""
Check if the url is a special case.

:param value: url to the file
:return: True if the url is a special case, False otherwise
"""
return False
if mimetype and mimetype.startswith(cls.mime_type()):
return True
return any(
value.endswith(ext) or value.split("?")[0].endswith(ext)
for ext in cls.extra_extensions()
)

@classmethod
def validate(
Expand All @@ -112,10 +96,7 @@ def validate(
url = super().validate(abs_path, field, config) # basic url validation

if not cls.is_extension_allowed(value):
if not cls.is_special_case(value): # check for special cases
raise ValueError(
f'file {value} is not a valid file format for class {cls}'
)
raise ValueError(f'file {value} is not a valid file format for class {cls}')

return cls(str(value if input_is_relative_path else url), scheme=None)

Expand Down
15 changes: 0 additions & 15 deletions docarray/typing/url/text_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,6 @@ def extra_extensions(cls) -> List[str]:
"""
return ['.md']

@classmethod
def is_special_case(cls, value: 'AnyUrl') -> bool:
"""
Check if the url is a special case that needs to be handled differently.

:param value: url to the file
:return: True if the url is a special case, False otherwise
"""
if value.startswith('http') or value.startswith('https'):
if len(value.split('/')[-1].split('.')) == 1:
# This handles the case where the value is a URL without a file extension
# for e.g. https://de.wikipedia.org/wiki/Brixen
return True
return False

def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
"""
Load the text file into a string.
Expand Down
2 changes: 1 addition & 1 deletion tests/integrations/typing/test_typing_proto.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class Mymmdoc(BaseDoc):
embedding=np.zeros((100, 1)),
any_url='http://jina.ai',
image_url='http://jina.ai/bla.jpg',
text_url='http://jina.ai',
text_url='http://jina.ai/jina.txt',
mesh_url='http://jina.ai/mesh.obj',
point_cloud_url='http://jina.ai/mesh.obj',
)
Expand Down