fix: update audio tensors and audio url

Signed-off-by: anna-charlotte <[email protected]>
docarray · JoanFM · Jan 3, 2023 · Dec 14, 2022 · Dec 14, 2022 · Dec 15, 2022
commit 7774181d6c9768d31c8a8b400e67a5fca6156d49
diff --git a/docarray/typing/__init__.py b/docarray/typing/__init__.py
@@ -1,5 +1,6 @@
 from docarray.typing.id import ID
 from docarray.typing.tensor import NdArray, Tensor
+from docarray.typing.tensor.audio import AudioNdArray
 from docarray.typing.tensor.embedding import Embedding
 from docarray.typing.url import (
     AnyUrl,
@@ -11,6 +12,7 @@
 )
 
 __all__ = [
+    'AudioNdArray',
     'NdArray',
     'Embedding',
     'ImageUrl',
@@ -29,5 +31,6 @@
     pass
 else:
     from docarray.typing.tensor import TorchEmbedding, TorchTensor  # noqa: F401
+    from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor  # noqa
 
-    __all__.extend(['TorchEmbedding', 'TorchTensor'])
+    __all__.extend(['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor'])
diff --git a/docarray/typing/tensor/audio/__init__.py b/docarray/typing/tensor/audio/__init__.py
@@ -0,0 +1,12 @@
+from docarray.typing.tensor.audio.audio_ndarray import AudioNdArray
+
+__all__ = ['AudioNdArray']
+
+try:
+    import torch  # noqa: F401
+except ImportError:
+    pass
+else:
+    from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor  # noqa
+
+    __all__.extend(['AudioTorchTensor'])
diff --git a/docarray/typing/tensor/audio/audio_ndarray.py b/docarray/typing/tensor/audio/audio_ndarray.py
@@ -1,14 +1,29 @@
 import wave
-from typing import BinaryIO, TypeVar, Union
+from typing import TYPE_CHECKING, BinaryIO, TypeVar, Union
 
 from docarray.typing import NdArray
 
 T = TypeVar('T', bound='AudioNdArray')
 
+if TYPE_CHECKING:
+    from docarray.proto import NodeProto
+
 
 class AudioNdArray(NdArray):
     """ """
 
+    def _to_node_protobuf(self: T, field: str = 'ndarray') -> 'NodeProto':
+        """Convert itself into a NodeProto protobuf message. This function should
+        be called when the Document is nested into another Document that need to be
+        converted into a protobuf
+        :param field: field in which to store the content in the node proto
+        :return: the nested item protobuf message
+        """
+        from docarray.proto import NodeProto
+
+        nd_proto = self.to_protobuf()
+        return NodeProto(**{field: nd_proto})
+
     def save_audio_tensor_to_file(
         self: 'T',
         file_path: Union[str, BinaryIO],
@@ -26,7 +41,11 @@ def save_audio_tensor_to_file(
 
         # Convert to (little-endian) 16 bit integers.
         max_int16 = 2**15
+        print(f"self = {self}")
+        print(f"self.__class__ = {self.__class__}")
+
         tensor = (self * max_int16).astype('<h')
+        print(f"tensor = {tensor}")
         n_channels = 2 if self.ndim > 1 else 1
 
         with wave.open(file_path, 'w') as f:
@@ -35,4 +54,6 @@ def save_audio_tensor_to_file(
             # 2 bytes per sample.
             f.setsampwidth(sample_width)
             f.setframerate(sample_rate)
+            print(f"tensor = {tensor}")
+            print(f"tensor.tobytes() = {tensor.tobytes()}")
             f.writeframes(tensor.tobytes())
diff --git a/docarray/typing/tensor/audio/audio_torch_tensor.py b/docarray/typing/tensor/audio/audio_torch_tensor.py
@@ -1,10 +1,7 @@
 import wave
 from typing import BinaryIO, TypeVar, Union
 
-import numpy as np
-
-from docarray.typing import TorchTensor
-from docarray.typing.tensor.torch_tensor import metaTorchAndNode
+from docarray.typing.tensor.torch_tensor import TorchTensor, metaTorchAndNode
 
 T = TypeVar('T', bound='AudioTorchTensor')
 
@@ -26,17 +23,16 @@ def save_audio_tensor_to_file(
         :param sample_rate: sampling frequency
         :param sample_width: sample width in bytes
         """
-        np_self: np.ndarray = self.cpu().detach().numpy()
+        import torch
 
-        # Convert to (little-endian) 16 bit integers.
         max_int16 = 2**15
-        tensor = (np_self * max_int16).astype('<h')
-        n_channels = 2 if np_self.ndim > 1 else 1
+        tensor = torch.tensor(self * max_int16, dtype=torch.int16)
+        n_channels = 2 if self.ndim > 1 else 1
 
         with wave.open(file_path, 'w') as f:
             # 2 Channels.
             f.setnchannels(n_channels)
             # 2 bytes per sample.
             f.setsampwidth(sample_width)
             f.setframerate(sample_rate)
-            f.writeframes(tensor.tobytes())
+            f.writeframes(tensor.cpu().detach().numpy().tobytes())
diff --git a/docarray/typing/url/audio_url.py b/docarray/typing/url/audio_url.py
@@ -1,15 +1,20 @@
 import wave
-from typing import TYPE_CHECKING, TypeVar, Union
+from typing import TYPE_CHECKING, Any, Type, TypeVar, Union
 
 import numpy as np
 
 from docarray.typing.url.any_url import AnyUrl
 
 if TYPE_CHECKING:
+    from pydantic import BaseConfig
+    from pydantic.fields import ModelField
+
     from docarray.proto import NodeProto
 
 T = TypeVar('T', bound='AudioUrl')
 
+AUDIO_FILE_FORMATS = ['wav']
+
 
 class AudioUrl(AnyUrl):
     """
@@ -28,9 +33,25 @@ def _to_node_protobuf(self: T) -> 'NodeProto':
 
         return NodeProto(audio_url=str(self))
 
+    @classmethod
+    def validate(
+        cls: Type[T],
+        value: Union[T, np.ndarray, Any],
+        field: 'ModelField',
+        config: 'BaseConfig',
+    ) -> T:
+        url = super().validate(value, field, config)  # basic url validation
+        has_audio_extension = any(url.endswith(ext) for ext in AUDIO_FILE_FORMATS)
+        if not has_audio_extension:
+            raise ValueError(
+                f'Audio URL must have one of the following extensions:'
+                f'{AUDIO_FILE_FORMATS}'
+            )
+        return cls(str(url), scheme=None)
+
     def load(self: T) -> np.ndarray:
         """
-        Load the data from the url into a numpy.ndarray audio tensor.
+        Load the data from the url into a numpy.ndarray.
 
         EXAMPLE USAGE
 
@@ -46,7 +67,7 @@ class MyDoc(Document):
                 audio_url: AudioUrl
 
 
-            doc = MyDoc(mesh_url="toydata/hello.wav")
+            doc = MyDoc(audio_url="toydata/hello.wav")
 
             audio_tensor = doc.audio_url.load()
             assert isinstance(audio_tensor, np.ndarray)