Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a452268
feat: add video url and tensors to proto
Jan 3, 2023
3ccb697
feat: add video url and video ndarray
Jan 3, 2023
dc957d1
feat: add video torch tensor and tests
Jan 4, 2023
fc86920
fix: mypy checks
Jan 4, 2023
8a55e0b
chore: add av to video extra
Jan 4, 2023
5cb098a
fix: allow dim 3
Jan 4, 2023
3ba1f78
test: wip video load and save
Jan 5, 2023
be63926
refactor: move to numpy to computational backend
Jan 6, 2023
395a495
fix: video load and save
Jan 11, 2023
406ec80
test: adjust tests
Jan 11, 2023
091e79a
fix: video load and save and add docstrings
Jan 11, 2023
dee1146
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
e4106a8
fix: fix some imports after merging
Jan 11, 2023
23ee930
docs: add doc strings and fix example urls
Jan 11, 2023
7ab8dbd
docs: small fixes in docs
Jan 11, 2023
ecf01d8
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
5295dd1
refactor: rename save to mp4 file to save
Jan 11, 2023
b3f2ccb
feat: add shape method to comp backend
Jan 16, 2023
20ecf2c
refactor: move validate shape to video tensor mixin
Jan 16, 2023
711d105
refactor: extract private load and make separate methods for frames
Jan 16, 2023
0c9c1fd
fix: use torch shape instead of size method
Jan 16, 2023
e3a465c
fix: add typehint to shape in comp backend
Jan 16, 2023
40eac93
docs: add supported strings for skip type
Jan 16, 2023
a700f30
fix: apply suggestions from code review
Jan 17, 2023
94572fd
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
07ceae8
fix: small change to trigger ci again
Jan 17, 2023
c2e129d
fix: extract shape var
Jan 17, 2023
d50ae67
fix: introduce compbackendinterface
Jan 17, 2023
2e365e6
fix: revert previous pr and fix for mypy
Jan 17, 2023
c44a035
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
95b0b81
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: add video torch tensor and tests
Signed-off-by: anna-charlotte <[email protected]>
  • Loading branch information
anna-charlotte committed Jan 4, 2023
commit dc957d19bc20bf7072f0dda8591b5de1d668406e
3 changes: 2 additions & 1 deletion docarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from docarray.array.array import DocumentArray
from docarray.document.document import BaseDocument
from docarray.predefined_document import Audio, Image, Mesh3D, PointCloud3D, Text
from docarray.predefined_document import Audio, Image, Mesh3D, PointCloud3D, Text, Video

__all__ = [
'BaseDocument',
Expand All @@ -12,4 +12,5 @@
'Text',
'Mesh3D',
'PointCloud3D',
'Video',
]
3 changes: 2 additions & 1 deletion docarray/predefined_document/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@
from docarray.predefined_document.mesh import Mesh3D
from docarray.predefined_document.point_cloud import PointCloud3D
from docarray.predefined_document.text import Text
from docarray.predefined_document.video import Video

__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D']
__all__ = ['Text', 'Image', 'Audio', 'Mesh3D', 'PointCloud3D', 'Video']
31 changes: 31 additions & 0 deletions docarray/predefined_document/video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from typing import Optional, TypeVar

from docarray.document import BaseDocument
from docarray.typing import AnyTensor, Embedding
from docarray.typing.tensor.video.video_tensor import VideoTensor
from docarray.typing.url.video_url import VideoUrl

T = TypeVar('T', bound='Video')


class Video(BaseDocument):
"""
Document for handling video.
The Video Document can contain a VideoUrl (`Video.url`), a VideoTensor
(`Video.tensor`), an AnyTensor ('Video.key_frame_indices), and an Embedding
(`Video.embedding`).

EXAMPLE USAGE:

You can use this Document directly:

You can extend this Document:

You can use this Document for composition:

"""

url: Optional[VideoUrl]
tensor: Optional[VideoTensor]
key_frame_indices: Optional[AnyTensor]
embedding: Optional[Embedding]
12 changes: 12 additions & 0 deletions docarray/typing/tensor/video/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from docarray.typing.tensor.video.video_ndarray import VideoNdArray

__all__ = ['VideoNdArray']

try:
import torch # noqa: F401
except ImportError:
pass
else:
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor # noqa

__all__.extend(['VideoTorchTensor'])
65 changes: 65 additions & 0 deletions docarray/typing/tensor/video/abstract_video_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Dict, Generator, Optional, Tuple, Type, TypeVar, Union

import numpy as np

from docarray.typing.tensor.abstract_tensor import AbstractTensor

T = TypeVar('T', bound='AbstractVideoTensor')


class AbstractVideoTensor(AbstractTensor, ABC):
@abstractmethod
def to_numpy(self) -> np.ndarray:
"""
Convert video tensor to numpy.ndarray.
"""
...

def save_to_file(
self: 'T',
file_path: Union[str, BinaryIO],
frame_rate: int = 30,
codec: str = 'h264',
) -> None:
"""
Save video tensor to a .wav file. Mono/stereo is preserved.


:param file_path: path to a .wav file. If file is a string, open the file by
that name, otherwise treat it as a file-like object.
:param frame_rate: frames per second.
:param codec: the name of a decoder/encoder.
"""
np_tensor = self.to_numpy()

video_tensor = np.moveaxis(np.clip(np_tensor, 0, 255), 1, 2).astype('uint8')

import av

with av.open(file_path, mode='w') as container:
stream = container.add_stream(codec, rate=frame_rate)
stream.width = np_tensor.shape[1]
stream.height = np_tensor.shape[2]
stream.pix_fmt = 'yuv420p'

for b in video_tensor:
frame = av.VideoFrame.from_ndarray(b, format='rgb24')
for packet in stream.encode(frame):
container.mux(packet)

for packet in stream.encode():
container.mux(packet)

@classmethod
def generator_from_webcam(
cls: Type['T'],
height_width: Optional[Tuple[int, int]] = None,
show_window: bool = True,
window_title: str = 'webcam',
fps: int = 30,
exit_key: int = 27,
exit_event=None,
tags: Optional[Dict] = None,
) -> Generator['T', None, None]:
...
54 changes: 19 additions & 35 deletions docarray/typing/tensor/video/video_ndarray.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TypeVar
from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union

import numpy as np

Expand All @@ -7,53 +7,37 @@

T = TypeVar('T', bound='VideoNdArray')

if TYPE_CHECKING:
from pydantic import BaseConfig
from pydantic.fields import ModelField


class VideoNdArray(AbstractVideoTensor, NdArray):
"""
Subclass of NdArray, to represent a video tensor.

Additionally, this allows storing such a tensor as a .wav audio file.
Adds video-specific features to the tensor.

EXAMPLE USAGE

.. code-block:: python

from typing import Optional
from pydantic import parse_obj_as
from docarray import Document
from docarray.typing import AudioNdArray, AudioUrl
import numpy as np


class MyAudioDoc(Document):
title: str
audio_tensor: Optional[AudioNdArray]
url: Optional[AudioUrl]


# from tensor
doc_1 = MyAudioDoc(
title='my_first_audio_doc',
audio_tensor=np.random.rand(1000, 2),
)
doc_1.audio_tensor.save_to_wav_file(file_path='path/to/file_1.wav')
# from url
doc_2 = MyAudioDoc(
title='my_second_audio_doc',
url='https://github.com/docarray/docarray/tree/feat-add-audio-v2/tests/toydata/hello.wav',
)
doc_2.audio_tensor = parse_obj_as(AudioNdArray, doc_2.url.load())
doc_2.audio_tensor.save_to_wav_file(file_path='path/to/file_2.wav')
"""

_PROTO_FIELD_NAME = 'video_ndarray'

def check_shape(self) -> None:
if self.ndim != 4 or self.shape[-1] != 3 or self.dtype != np.uint8:
@classmethod
def validate(
cls: Type[T],
value: Union[T, np.ndarray, List[Any], Tuple[Any], Any],
field: 'ModelField',
config: 'BaseConfig',
) -> T:
array = super().validate(value=value, field=field, config=config)
if array.ndim not in [3, 4] or array.shape[-1] != 3:
raise ValueError(
f'expects `` with dtype=uint8 and ndim=4 and the last dimension is 3, '
f'but receiving {self.shape} in {self.dtype}'
f'Expects tensor with 3 or 4 dimensions and the last dimension equal'
f' to 3, but received {array.shape} in {array.dtype}'
)
else:
return array

def to_numpy(self) -> np.ndarray:
return self
13 changes: 13 additions & 0 deletions docarray/typing/tensor/video/video_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from typing import Union

from docarray.typing.tensor.video.video_ndarray import VideoNdArray

try:
import torch # noqa: F401
except ImportError:
VideoTensor = VideoNdArray

else:
from docarray.typing.tensor.video.video_torch_tensor import VideoTorchTensor

VideoTensor = Union[VideoNdArray, VideoTorchTensor] # type: ignore
43 changes: 43 additions & 0 deletions docarray/typing/tensor/video/video_torch_tensor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from typing import TYPE_CHECKING, Any, List, Tuple, Type, TypeVar, Union

import numpy as np

from docarray.typing.tensor.torch_tensor import TorchTensor, metaTorchAndNode
from docarray.typing.tensor.video.abstract_video_tensor import AbstractVideoTensor

T = TypeVar('T', bound='VideoTorchTensor')

if TYPE_CHECKING:
from pydantic import BaseConfig
from pydantic.fields import ModelField


class VideoTorchTensor(AbstractVideoTensor, TorchTensor, metaclass=metaTorchAndNode):
"""
Subclass of TorchTensor, to represent a video tensor.
Adds video-specific features to the tensor.

EXAMPLE USAGE

"""

_PROTO_FIELD_NAME = 'video_torch_tensor'

@classmethod
def validate(
cls: Type[T],
value: Union[T, np.ndarray, List[Any], Tuple[Any], Any],
field: 'ModelField',
config: 'BaseConfig',
) -> T:
tensor = super().validate(value=value, field=field, config=config)
if tensor.ndim not in [3, 4] or tensor.shape[-1] != 3:
raise ValueError(
f'Expects tensor with 3 or 4 dimensions and the last dimension equal '
f'to 3, but received {tensor.shape} in {tensor.dtype}'
)
else:
return tensor

def to_numpy(self) -> np.ndarray:
return self.cpu().detach().numpy()
13 changes: 6 additions & 7 deletions docarray/typing/url/video_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,17 @@ def validate(
return cls(str(url), scheme=None)

def load(
self: T, only_keyframes: bool = False, **kwargs
) -> Union[VideoNdArray, Tuple[VideoNdArray, VideoNdArray]]:
self: T, only_keyframes: bool = False, dtype: str = 'int32', **kwargs
) -> Union[VideoNdArray, Tuple[VideoNdArray, np.ndarray]]:
"""
Load the data from the url into a numpy.ndarray.
Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and
np.ndarray.



:param only_keyframes: if True keep only the keyframes, if False keep all frames
and store the indices of the keyframes in :attr:`.tags`
:param dtype: Data-type of the returned array; default: int32.
:param kwargs: supports all keyword arguments that are being supported by
av.open() as described in:
https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open
Expand Down Expand Up @@ -86,7 +88,4 @@ def load(
if only_keyframes:
return frames
else:
indices = parse_obj_as(
VideoNdArray, np.ndarray(keyframe_indices, dtype=np.int32)
)
return frames, indices
return frames, np.ndarray(keyframe_indices, dtype=dtype)
43 changes: 43 additions & 0 deletions tests/integrations/predefined_document/test_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os

import numpy as np
import pytest

from docarray import Video
from docarray.typing import VideoNdArray
from tests import TOYDATA_DIR

LOCAL_VIDEO_FILE = str(TOYDATA_DIR / 'mov_bbb.mp4')
REMOTE_VIDEO_FILE = 'https://github.com/docarray/docarray/blob/feat-rewrite-v2/tests/toydata/mov_bbb.mp4?raw=true' # noqa: E501


@pytest.mark.slow
@pytest.mark.internet
@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE])
def test_video(file_url):
video = Video(url=file_url)
video.tensor, video.key_frame_indices = video.url.load()

assert isinstance(video.tensor, np.ndarray)
assert isinstance(video.tensor, VideoNdArray)
assert isinstance(video.key_frame_indices, np.ndarray)


@pytest.mark.slow
@pytest.mark.internet
@pytest.mark.parametrize('file_url', [LOCAL_VIDEO_FILE, REMOTE_VIDEO_FILE])
def test_save_video_ndarray(file_url, tmpdir):
tmp_file = str(tmpdir / 'tmp.mp4')

video = Video(url=file_url)
video.tensor, _ = video.url.load()

assert isinstance(video.tensor, np.ndarray)
assert isinstance(video.tensor, VideoNdArray)

video.tensor.save_to_file(tmp_file)
assert os.path.isfile(tmp_file)

video_from_file = Video(url=tmp_file)
video_from_file.tensor = video_from_file.url.load()
assert np.allclose(video.tensor, video_from_file.tensor)
Loading