Skip to content

Commit 9888d93

Browse files
author
Charlotte Gerhaher
authored
feat: add video support (#972)
* feat: add video url and tensors to proto Signed-off-by: anna-charlotte <[email protected]> * feat: add video url and video ndarray Signed-off-by: anna-charlotte <[email protected]> * feat: add video torch tensor and tests Signed-off-by: anna-charlotte <[email protected]> * fix: mypy checks Signed-off-by: anna-charlotte <[email protected]> * chore: add av to video extra Signed-off-by: anna-charlotte <[email protected]> * fix: allow dim 3 Signed-off-by: anna-charlotte <[email protected]> * test: wip video load and save Signed-off-by: anna-charlotte <[email protected]> * refactor: move to numpy to computational backend Signed-off-by: anna-charlotte <[email protected]> * fix: video load and save Signed-off-by: anna-charlotte <[email protected]> * test: adjust tests Signed-off-by: anna-charlotte <[email protected]> * fix: video load and save and add docstrings Signed-off-by: anna-charlotte <[email protected]> * fix: fix some imports after merging Signed-off-by: anna-charlotte <[email protected]> * docs: add doc strings and fix example urls Signed-off-by: anna-charlotte <[email protected]> * docs: small fixes in docs Signed-off-by: anna-charlotte <[email protected]> * refactor: rename save to mp4 file to save Signed-off-by: anna-charlotte <[email protected]> * feat: add shape method to comp backend Signed-off-by: anna-charlotte <[email protected]> * refactor: move validate shape to video tensor mixin Signed-off-by: anna-charlotte <[email protected]> * refactor: extract private load and make separate methods for frames Signed-off-by: anna-charlotte <[email protected]> * fix: use torch shape instead of size method Signed-off-by: anna-charlotte <[email protected]> * fix: add typehint to shape in comp backend Signed-off-by: anna-charlotte <[email protected]> * docs: add supported strings for skip type Signed-off-by: anna-charlotte <[email protected]> * fix: apply suggestions from code review Signed-off-by: anna-charlotte <[email protected]> * fix: small change to trigger ci again Signed-off-by: anna-charlotte <[email protected]> * fix: extract shape var Signed-off-by: anna-charlotte <[email protected]> * fix: introduce compbackendinterface Signed-off-by: anna-charlotte <[email protected]> * fix: revert previous pr and fix for mypy Signed-off-by: anna-charlotte <[email protected]> Signed-off-by: anna-charlotte <[email protected]>
1 parent 29b4254 commit 9888d93

File tree

24 files changed

+906
-25
lines changed

24 files changed

+906
-25
lines changed

docarray/computation/abstract_comp_backend.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import typing
22
from abc import ABC, abstractmethod
3-
from typing import List, Optional, Tuple, TypeVar, Union, overload
3+
from typing import TYPE_CHECKING, List, Optional, Tuple, TypeVar, Union, overload
4+
5+
if TYPE_CHECKING:
6+
import numpy as np
47

58
# In practice all of the below will be the same type
69
TTensor = TypeVar('TTensor')
@@ -30,6 +33,17 @@ def stack(
3033
@staticmethod
3134
@abstractmethod
3235
def n_dim(array: 'TTensor') -> int:
36+
"""
37+
Get the number of the array dimensions.
38+
"""
39+
...
40+
41+
@staticmethod
42+
@abstractmethod
43+
def to_numpy(array: 'TTensor') -> 'np.ndarray':
44+
"""
45+
Convert array to np.ndarray.
46+
"""
3347
...
3448

3549
@staticmethod

docarray/computation/numpy_backend.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def to_device(
6464
def n_dim(array: 'np.ndarray') -> int:
6565
return array.ndim
6666

67+
@staticmethod
68+
def to_numpy(array: 'np.ndarray') -> 'np.ndarray':
69+
return array
70+
6771
@staticmethod
6872
def empty(shape: Tuple[int, ...]) -> 'np.ndarray':
6973
return np.empty(shape)

docarray/computation/torch_backend.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union, overload
22

3+
import numpy as np
34
import torch
45

56
from docarray.computation.abstract_comp_backend import AbstractComputationalBackend
@@ -68,6 +69,10 @@ def empty(shape: Tuple[int, ...]) -> torch.Tensor:
6869
def n_dim(array: 'torch.Tensor') -> int:
6970
return array.ndim
7071

72+
@staticmethod
73+
def to_numpy(array: 'torch.Tensor') -> 'np.ndarray':
74+
return array.cpu().detach().numpy()
75+
7176
@staticmethod
7277
def none_value() -> Any:
7378
"""Provide a compatible value that represents None in torch."""

docarray/documents/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@
33
from docarray.documents.mesh import Mesh3D
44
from docarray.documents.point_cloud import PointCloud3D
55
from docarray.documents.text import Text
6+
from docarray.documents.video import Video
67

7-
__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D']
8+
__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D', 'Video']

docarray/documents/audio.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class Audio(BaseDocument):
2424
2525
# use it directly
2626
audio = Audio(
27-
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
27+
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
2828
)
2929
audio.tensor = audio.url.load()
3030
model = MyEmbeddingModel()
@@ -43,12 +43,12 @@ class MyAudio(Audio):
4343
4444
4545
audio = MyAudio(
46-
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
46+
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
4747
)
4848
audio.tensor = audio.url.load()
4949
model = MyEmbeddingModel()
5050
audio.embedding = model(audio.tensor)
51-
audio.name = 'my first audio'
51+
audio.name = Text(text='my first audio')
5252
5353
5454
You can use this Document for composition:
@@ -66,7 +66,7 @@ class MultiModalDoc(Document):
6666
6767
mmdoc = MultiModalDoc(
6868
audio=Audio(
69-
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav?raw=true'
69+
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/hello.wav?raw=true'
7070
),
7171
text=Text(text='hello world, how are you doing?'),
7272
)

docarray/documents/video.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
from typing import Optional, TypeVar
2+
3+
from docarray.base_document import BaseDocument
4+
from docarray.documents import Audio
5+
from docarray.typing import AnyEmbedding, AnyTensor
6+
from docarray.typing.tensor.video.video_tensor import VideoTensor
7+
from docarray.typing.url.video_url import VideoUrl
8+
9+
T = TypeVar('T', bound='Video')
10+
11+
12+
class Video(BaseDocument):
13+
"""
14+
Document for handling video.
15+
The Video Document can contain a VideoUrl (`Video.url`), an Audio Document
16+
(`Video.audio`), a VideoTensor (`Video.video_tensor`), an AnyTensor representing
17+
the indices of the video's key frames (`Video.key_frame_indices`) and an
18+
AnyEmbedding (`Video.embedding`).
19+
20+
EXAMPLE USAGE:
21+
22+
You can use this Document directly:
23+
24+
.. code-block:: python
25+
26+
from docarray.documents import Video
27+
28+
# use it directly
29+
vid = Video(
30+
url='https://github.com/docarray/docarray/tree/feat-add-video-v2/tests/toydata/mov_bbb.mp4?raw=true'
31+
)
32+
vid.audio.tensor, vid.video_tensor, vid.key_frame_indices = vid.url.load()
33+
model = MyEmbeddingModel()
34+
vid.embedding = model(vid.video_tensor)
35+
36+
You can extend this Document:
37+
38+
.. code-block:: python
39+
40+
from typing import Optional
41+
42+
from docarray.documents import Text, Video
43+
44+
45+
# extend it
46+
class MyVideo(Video):
47+
name: Optional[Text]
48+
49+
50+
video = MyVideo(
51+
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true'
52+
)
53+
video.video_tensor = video.url.load_key_frames()
54+
model = MyEmbeddingModel()
55+
video.embedding = model(video.video_tensor)
56+
video.name = Text(text='my first video')
57+
58+
You can use this Document for composition:
59+
60+
.. code-block:: python
61+
62+
from docarray import BaseDocument
63+
from docarray.documents import Text, Video
64+
65+
66+
# compose it
67+
class MultiModalDoc(BaseDocument):
68+
video: Video
69+
text: Text
70+
71+
72+
mmdoc = MultiModalDoc(
73+
video=Video(
74+
url='https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true'
75+
),
76+
text=Text(text='hello world, how are you doing?'),
77+
)
78+
mmdoc.video.video_tensor = mmdoc.video.url.load_key_frames()
79+
"""
80+
81+
url: Optional[VideoUrl]
82+
audio: Optional[Audio] = Audio()
83+
video_tensor: Optional[VideoTensor]
84+
key_frame_indices: Optional[AnyTensor]
85+
embedding: Optional[AnyEmbedding]

docarray/proto/docarray.proto

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,12 @@ message NodeProto {
6969

7070
NdArrayProto audio_torch_tensor = 16;
7171

72+
string video_url = 17;
73+
74+
NdArrayProto video_ndarray = 18;
75+
76+
NdArrayProto video_torch_tensor = 19;
77+
7278
}
7379

7480
}

docarray/proto/pb2/docarray_pb2.py

Lines changed: 14 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docarray/typing/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,28 @@
33
from docarray.typing.tensor.embedding.embedding import AnyEmbedding
44
from docarray.typing.tensor.ndarray import NdArray
55
from docarray.typing.tensor.tensor import AnyTensor
6+
from docarray.typing.tensor.video import VideoNdArray
67
from docarray.typing.url import (
78
AnyUrl,
89
AudioUrl,
910
ImageUrl,
1011
Mesh3DUrl,
1112
PointCloud3DUrl,
1213
TextUrl,
14+
VideoUrl,
1315
)
1416

1517
__all__ = [
16-
'AudioNdArray',
1718
'NdArray',
19+
'AudioNdArray',
20+
'VideoNdArray',
1821
'AnyEmbedding',
1922
'ImageUrl',
2023
'AudioUrl',
2124
'TextUrl',
2225
'Mesh3DUrl',
2326
'PointCloud3DUrl',
27+
'VideoUrl',
2428
'AnyUrl',
2529
'ID',
2630
'AnyTensor',
@@ -33,5 +37,8 @@
3337
else:
3438
from docarray.typing.tensor import TorchEmbedding, TorchTensor # noqa: F401
3539
from docarray.typing.tensor.audio.audio_torch_tensor import AudioTorchTensor # noqa
40+
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa
3641

37-
__all__.extend(['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor'])
42+
__all__.extend(
43+
['AudioTorchTensor', 'TorchEmbedding', 'TorchTensor', 'VideoTorchTensor']
44+
)
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from docarray.typing.tensor.video.video_ndarray import VideoNdArray
2+
3+
__all__ = ['VideoNdArray']
4+
5+
try:
6+
import torch # noqa: F401
7+
except ImportError:
8+
pass
9+
else:
10+
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa
11+
12+
__all__.extend(['VideoTorchTensor'])

0 commit comments

Comments
 (0)