Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
bebc9d4
feat: add audio url class
Dec 14, 2022
6025c2f
fix: typos
Dec 14, 2022
9a599e5
test: add tests for audio and audio url
Dec 15, 2022
04abdae
feat: add audio url and audio predefined class
Dec 15, 2022
f8d700d
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-a…
Dec 21, 2022
d58f804
chore: add types-request
Dec 22, 2022
bdf8e88
feat: add audio tensors torch and ndarray
Dec 22, 2022
6572df8
fix: mypy type hints
Dec 22, 2022
9cd4baa
test: empty test file
Dec 22, 2022
b3c1948
test: add more unit and integration tests
Dec 28, 2022
7774181
fix: update audio tensors and audio url
Dec 28, 2022
af840d4
fix: remove print statements
Dec 28, 2022
797f488
docs: add documentation
Dec 28, 2022
8b48a77
refactor: rename test audio py to test audio tensor py
Dec 28, 2022
e135438
fix: typo in torch tensor py
Dec 28, 2022
14fcf6b
feat: add proto stuff to audio tensors
Dec 28, 2022
c623a13
test: add tests for proto and set tensors
Dec 28, 2022
1be8e3f
fix: set tensor to tensor int, since no inplace change
Dec 28, 2022
17786eb
refactor: rename to save to wav file
Dec 28, 2022
97355f7
docs: fix typo
Dec 28, 2022
20e2344
docs: fix docs for save tensor to wav file
Dec 28, 2022
7fc06e1
Merge branch 'feat-rewrite-v2' into feat-add-audio-v2
Dec 28, 2022
b34d783
fix: apply suggestions from code review
Dec 28, 2022
130d8ab
fix: apply suggestions from code review
Dec 29, 2022
2954351
test: fix assertions
Dec 29, 2022
61cb103
fix: move max int multiplication to abstract class
Dec 29, 2022
5943c0f
feat: add ndim method to abstract tensor class and concrete classes
Dec 29, 2022
131c5ff
fix: ndim
Dec 29, 2022
83ef649
fix: revert ndim in abstract tensor and torch tensor and ndarray
Dec 29, 2022
eecca41
fix: mypy checks
Dec 29, 2022
4762c3c
docs: add docstring to n dim
Dec 29, 2022
6948122
refactor: move n dim to abstract tensor and subclasses
Dec 29, 2022
d174087
refactor: make to protobuf abstract, change node to protobuf signature
Dec 29, 2022
3a52303
fix: remove not needed methods
Dec 29, 2022
a0be12e
fix: change remote audio file to file from github
Dec 30, 2022
9623d29
fix: raw content from remote file
Dec 30, 2022
6efdcf2
fix: path to github remote file
Dec 30, 2022
5026543
refactor: tensor field name to proto field name
Jan 2, 2023
703de43
test: remove redundant test in test audio tensor
Jan 2, 2023
83ece31
fix: load audio url to audio ndarray instead of np ndarray
Jan 2, 2023
de079e2
refactor: move n dim to computational backend
Jan 2, 2023
2ef1350
docs: update docstrings for audio tensors
Jan 3, 2023
d51d38e
feat: make dtype in audiourl load optional
Jan 3, 2023
3901cfa
Merge branch 'feat-rewrite-v2' into feat-add-audio-v2
Jan 3, 2023
a571898
test: fix document refactor and ndarray import
Jan 3, 2023
71af630
fix: fix mypy check
Jan 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions docarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

from docarray.array.array import DocumentArray
from docarray.document.document import BaseDocument
from docarray.predefined_document import Image, Mesh3D, PointCloud3D, Text
from docarray.predefined_document import Audio, Image, Mesh3D, PointCloud3D, Text

__all__ = ['BaseDocument', 'DocumentArray', 'Image', 'Text', 'Mesh3D', 'PointCloud3D']
__all__ = [
'BaseDocument',
'DocumentArray',
'Image',
'Audio',
'Text',
'Mesh3D',
'PointCloud3D',
]
5 changes: 5 additions & 0 deletions docarray/computation/abstract_comp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def stack(
"""
...

@staticmethod
@abstractmethod
def n_dim(array: 'TTensor') -> int:
...

class Retrieval(ABC, typing.Generic[TTensorRetrieval]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
4 changes: 4 additions & 0 deletions docarray/computation/numpy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def stack(
) -> 'np.ndarray':
return np.stack(tensors, axis=dim)

@staticmethod
def n_dim(array: 'np.ndarray') -> int:
return array.ndim

class Retrieval(AbstractComputationalBackend.Retrieval[np.ndarray]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
4 changes: 4 additions & 0 deletions docarray/computation/torch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def stack(
) -> 'torch.Tensor':
return torch.stack(tensors, dim=dim)

@staticmethod
def n_dim(array: 'torch.Tensor') -> int:
return array.ndim

class Retrieval(AbstractComputationalBackend.Retrieval[torch.Tensor]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
3 changes: 2 additions & 1 deletion docarray/predefined_document/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from docarray.predefined_document.audio import Audio
from docarray.predefined_document.image import Image
from docarray.predefined_document.mesh import Mesh3D
from docarray.predefined_document.point_cloud import PointCloud3D
from docarray.predefined_document.text import Text

__all__ = ['Text', 'Image', 'Mesh3D', 'PointCloud3D']
__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D']
77 changes: 77 additions & 0 deletions docarray/predefined_document/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from typing import Optional, TypeVar

from docarray.document import BaseDocument
from docarray.typing import AudioUrl, Embedding
from docarray.typing.tensor.audio.audio_tensor import AudioTensor

T = TypeVar('T', bound='Audio')


class Audio(BaseDocument):
"""
Document for handling audios.

The Audio Document can contain an AudioUrl (`Audio.url`), an AudioTensor
(`Audio.tensor`), and an Embedding (`Audio.embedding`).

EXAMPLE USAGE:

You can use this Document directly:

.. code-block:: python

from docarray import Audio

# use it directly
audio = Audio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor = audio.url.load()
model = MyEmbeddingModel()
audio.embedding = model(audio.tensor)

You can extend this Document:

.. code-block:: python

from docarray import Audio, Text
from typing import Optional

# extend it
class MyAudio(Audio):
name: Optional[Text]


audio = MyAudio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
)
audio.tensor = audio.url.load()
model = MyEmbeddingModel()
audio.embedding = model(audio.tensor)
audio.name = 'my first audio'


You can use this Document for composition:

.. code-block:: python

from docarray import Document, Audio, Text

# compose it
class MultiModalDoc(Document):
audio: Audio
text: Text


mmdoc = MultiModalDoc(
audio=Audio(
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
),
text=Text(text='hello world, how are you doing?'),
)
mmdoc.audio.tensor = mmdoc.audio.url.load()
"""

url: Optional[AudioUrl]
tensor: Optional[AudioTensor]
embedding: Optional[Embedding]
5 changes: 5 additions & 0 deletions docarray/proto/docarray.proto
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ message NodeProto {

string point_cloud_url = 13;

string audio_url = 14;

NdArrayProto audio_ndarray = 15;

NdArrayProto audio_torch_tensor = 16;

}

Expand Down
28 changes: 14 additions & 14 deletions docarray/proto/pb2/docarray_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions docarray/typing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,23 @@
from docarray.typing.id import ID
from docarray.typing.tensor.audio import AudioNdArray
from docarray.typing.tensor.embedding.embedding import Embedding
from docarray.typing.tensor.ndarray import NdArray
from docarray.typing.tensor.tensor import AnyTensor
from docarray.typing.url import AnyUrl, ImageUrl, Mesh3DUrl, PointCloud3DUrl, TextUrl
from docarray.typing.url import (
AnyUrl,
AudioUrl,
ImageUrl,
Mesh3DUrl,
PointCloud3DUrl,
TextUrl,
)

__all__ = [
'AudioNdArray',
'NdArray',
'Embedding',
'ImageUrl',
'AudioUrl',
'TextUrl',
'Mesh3DUrl',
'PointCloud3DUrl',
Expand All @@ -22,5 +32,6 @@
pass
else:
from docarray.typing.tensor import TorchEmbedding, TorchTensor # noqa: F401
from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor # noqa

__all__.extend(['TorchEmbedding', 'TorchTensor'])
__all__.extend(['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor'])
1 change: 1 addition & 0 deletions docarray/typing/tensor/abstract_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
class AbstractTensor(AbstractType, Generic[ShapeT], ABC):

__parametrized_meta__ = type
_PROTO_FIELD_NAME: str

@classmethod
@abc.abstractmethod
Expand Down
12 changes: 12 additions & 0 deletions docarray/typing/tensor/audio/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from docarray.typing.tensor.audio.audio_ndarray import AudioNdArray

__all__ = ['AudioNdArray']

try:
import torch # noqa: F401
except ImportError:
pass
else:
from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor # noqa

__all__.extend(['AudioTorchTensor'])
39 changes: 39 additions & 0 deletions docarray/typing/tensor/audio/abstract_audio_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import wave
from abc import ABC, abstractmethod
from typing import BinaryIO, TypeVar, Union

from docarray.typing.tensor.abstract_tensor import AbstractTensor

T = TypeVar('T', bound='AbstractAudioTensor')


class AbstractAudioTensor(AbstractTensor, ABC):
@abstractmethod
def to_audio_bytes(self):
"""
Convert audio tensor to bytes.
"""
...

def save_to_wav_file(
self: 'T',
file_path: Union[str, BinaryIO],
sample_rate: int = 44100,
sample_width: int = 2,
) -> None:
"""
Save audio tensor to a .wav file. Mono/stereo is preserved.

:param file_path: path to a .wav file. If file is a string, open the file by
that name, otherwise treat it as a file-like object.
:param sample_rate: sampling frequency
:param sample_width: sample width in bytes
"""
comp_backend = self.get_comp_backend()
n_channels = 2 if comp_backend.n_dim(array=self) > 1 else 1 # type: ignore

with wave.open(file_path, 'w') as f:
f.setnchannels(n_channels)
f.setsampwidth(sample_width)
f.setframerate(sample_rate)
f.writeframes(self.to_audio_bytes())
59 changes: 59 additions & 0 deletions docarray/typing/tensor/audio/audio_ndarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from typing import TypeVar

from docarray.typing.tensor.audio.abstract_audio_tensor import AbstractAudioTensor
from docarray.typing.tensor.ndarray import NdArray

MAX_INT_16 = 2**15

T = TypeVar('T', bound='AudioNdArray')


class AudioNdArray(AbstractAudioTensor, NdArray):
"""
Subclass of NdArray, to represent an audio tensor.
Adds audio-specific features to the tensor.


EXAMPLE USAGE

.. code-block:: python

from typing import Optional

from pydantic import parse_obj_as

from docarray import Document
from docarray.typing import AudioNdArray, AudioUrl
import numpy as np


class MyAudioDoc(Document):
title: str
audio_tensor: Optional[AudioNdArray]
url: Optional[AudioUrl]


# from tensor
doc_1 = MyAudioDoc(
title='my_first_audio_doc',
audio_tensor=np.random.rand(1000, 2),
)

doc_1.audio_tensor.save_to_wav_file(file_path='path/to/file_1.wav')

# from url
doc_2 = MyAudioDoc(
title='my_second_audio_doc',
url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor = parse_obj_as(AudioNdArray, doc_2.url.load())
doc_2.audio_tensor.save_to_wav_file(file_path='path/to/file_2.wav')

"""

_PROTO_FIELD_NAME = 'audio_ndarray'

def to_audio_bytes(self):
tensor = (self * MAX_INT_16).astype('<h')
return tensor.tobytes()
13 changes: 13 additions & 0 deletions docarray/typing/tensor/audio/audio_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing import Union

from docarray.typing.tensor.audio.audio_ndarray import AudioNdArray

try:
import torch # noqa: F401
except ImportError:
AudioTensor = AudioNdArray

else:
from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor

AudioTensor = Union[AudioNdArray, AudioTorchTensor] # type: ignore
Loading