Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
943d636
feat: display mesh and pointcloud
Feb 9, 2023
e9acd12
chore: update poetry
Feb 9, 2023
c91ab6c
fix: mypy
Feb 9, 2023
e979c32
fix: add display from param to mesh and pc display
Feb 9, 2023
21fb8ab
fix: clean up
Feb 9, 2023
75b33d8
fix: mypy
Feb 9, 2023
7890194
fix: move display from url to mesh and pc url classes
Feb 10, 2023
0129c3a
chore: remove pyglet dependency
Feb 10, 2023
322a718
chore: update pyproject toml
Feb 10, 2023
05d8461
refactor: copy is notebook function from hubble sdk
Feb 10, 2023
0244816
fix: introduce vertices and faces doc
Feb 10, 2023
255795c
fix: introduce points and colors class for point cloud
Feb 10, 2023
db42712
fix: mypy and tests
Feb 10, 2023
dca04ce
docs: add display example to docs
Feb 10, 2023
788f834
fix: apply johannes suggestion from review
Feb 10, 2023
57fb1e1
fix: apply samis suggestion
Feb 10, 2023
3a8dc5e
docs: update docstring
Feb 14, 2023
38f771d
fix: only display in notebook
Feb 15, 2023
8c31318
docs: update docstring
Feb 15, 2023
bbef411
chore: get poetry lock file from feat rewrite v2
Feb 15, 2023
b376ddd
docs: update docstrings
Feb 15, 2023
016760d
feat: display image from img url and img tensor
Feb 10, 2023
f55d811
fix: display from image url and from image tensor
Feb 13, 2023
0a23b52
fix: use is notebook from utils instead o f hubble
Feb 13, 2023
084921b
feat: audio from url
Feb 14, 2023
a949033
feat: display video and add pydub to pyproject toml
Feb 15, 2023
62b58ee
wip: remove non notebook
Feb 15, 2023
442dd29
fix: all except video tensor
Feb 16, 2023
b486b96
fix: mypy check for ipython display
Feb 16, 2023
f77d216
fix: mypy check for ipython display
Feb 16, 2023
ca948f9
feat: add videobytes
Feb 16, 2023
0dc606c
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-displ…
Feb 16, 2023
f2c0cff
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-displ…
Feb 16, 2023
48a85ba
chore: poetry lock
Feb 16, 2023
9df9df5
fix: clean up
Feb 16, 2023
e783c14
Merge branch 'feat-rewrite-v2' into feat-display-img-audio-vid
Feb 17, 2023
fe9d3bf
fix: mypy check
Feb 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: add videobytes
Signed-off-by: anna-charlotte <[email protected]>
  • Loading branch information
anna-charlotte committed Feb 16, 2023
commit ca948f90f018047e81e4bccd28a0c3dc263075a1
112 changes: 112 additions & 0 deletions docarray/typing/bytes/video_bytes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
from io import BytesIO
from typing import TYPE_CHECKING, Any, NamedTuple, Type, TypeVar

import numpy as np
from pydantic import parse_obj_as
from pydantic.validators import bytes_validator

from docarray.typing import AudioNdArray, NdArray, VideoNdArray
from docarray.typing.abstract_type import AbstractType
from docarray.typing.proto_register import _register_proto

if TYPE_CHECKING:
from pydantic.fields import BaseConfig, ModelField

from docarray.proto import NodeProto

T = TypeVar('T', bound='VideoBytes')


class VideoLoadResult(NamedTuple):
video: VideoNdArray
audio: AudioNdArray
key_frame_indices: NdArray


@_register_proto(proto_type_name='video_bytes')
class VideoBytes(bytes, AbstractType):
"""
Bytes that store a video and that can be load into a video tensor
"""

@classmethod
def validate(
cls: Type[T],
value: Any,
field: 'ModelField',
config: 'BaseConfig',
) -> T:

value = bytes_validator(value)
return cls(value)

@classmethod
def from_protobuf(cls: Type[T], pb_msg: T) -> T:
return parse_obj_as(cls, pb_msg)

def _to_node_protobuf(self: T) -> 'NodeProto':
from docarray.proto import NodeProto

return NodeProto(blob=self, type=self._proto_type_name)

def load(self, **kwargs) -> VideoLoadResult:
"""
Load the video from the bytes into a VideoLoadResult object consisting of a
VideoNdArray (`VideoLoadResult.video`), an AudioNdArray
(`VideoLoadResult.audio`) and an NdArray containing the key frame indices
(`VideoLoadResult.key_frame_indices`).

EXAMPLE USAGE

.. code-block:: python

from docarray import BaseDocument
from docarray.typing import VideoUrl
import numpy as np


class MyDoc(BaseDocument):
video_url: VideoUrl


doc = MyDoc(
video_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
"Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
)

video, audio, key_frame_indices = doc.video_url.load()
assert isinstance(video, np.ndarray)
assert isinstance(audio, np.ndarray)
assert isinstance(key_frame_indices, np.ndarray)

:param kwargs: supports all keyword arguments that are being supported by
av.open() as described in:
https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open
:return: a VideoLoadResult instance with video, audio and keyframe indices
"""
import av

with av.open(BytesIO(self), **kwargs) as container:
audio_frames = []
video_frames = []
keyframe_indices = []

for frame in container.decode():
if type(frame) == av.audio.frame.AudioFrame:
audio_frames.append(frame.to_ndarray())
elif type(frame) == av.video.frame.VideoFrame:
video_frames.append(frame.to_ndarray(format='rgb24'))

if frame.key_frame == 1:
curr_index = len(video_frames)
keyframe_indices.append(curr_index)

if len(audio_frames) == 0:
audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
else:
audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

video = parse_obj_as(VideoNdArray, np.stack(video_frames))
indices = parse_obj_as(NdArray, keyframe_indices)

return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)
55 changes: 50 additions & 5 deletions docarray/typing/tensor/video/video_tensor_mixin.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import abc
from typing import BinaryIO, Optional, Type, TypeVar, Union
import warnings
from io import BytesIO
from typing import Optional, Type, TypeVar, Union

import numpy as np

from docarray.typing.tensor.abstract_tensor import AbstractTensor
from docarray.typing.tensor.audio.audio_tensor import AudioTensor
from docarray.utils.misc import is_notebook

T = TypeVar('T', bound='AbstractTensor')

Expand All @@ -24,7 +27,7 @@ def validate_shape(cls: Type['T'], value: 'T') -> 'T':

def save(
self: 'T',
file_path: Union[str, BinaryIO],
file_path: Union[str, BytesIO],
audio_tensor: Optional[AudioTensor] = None,
video_frame_rate: int = 24,
video_codec: str = 'h264',
Expand Down Expand Up @@ -77,7 +80,7 @@ class MyDoc(BaseDocument):
np_tensor = self.get_comp_backend().to_numpy(array=self)
video_tensor = np_tensor.astype('uint8')

with av.open(file_path, mode='w') as container:
with av.open(file_path, mode='w', format='mp4') as container:
if video_tensor.ndim == 3:
video_tensor = np.expand_dims(video_tensor, axis=0)

Expand Down Expand Up @@ -110,8 +113,50 @@ class MyDoc(BaseDocument):
for packet in stream_video.encode(None):
container.mux(packet)

def display(self) -> None:
def to_bytes(
self: 'T',
audio_tensor: Optional[AudioTensor] = None,
video_frame_rate: int = 24,
video_codec: str = 'h264',
audio_frame_rate: int = 48000,
audio_codec: str = 'aac',
audio_format: str = 'fltp',
) -> bytes:
"""
Convert video tensor to bytes.

:param audio_tensor: AudioTensor containing the video's soundtrack.
:param video_frame_rate: video frames per second.
:param video_codec: the name of a video decoder/encoder.
:param audio_frame_rate: audio frames per second.
:param audio_codec: the name of an audio decoder/encoder.
:param audio_format: the name of one of the audio formats supported by PyAV,
such as 'flt', 'fltp', 's16' or 's16p'.

:return: bytes
"""
bytes = BytesIO()
self.save(
Copy link
Contributor Author

@anna-charlotte anna-charlotte Feb 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here mypy complains because "T" has no attribute "save". Why does it no complain though in line 164 (b = self.to_bytes(audio_tensor=audio)), isn't to_bytes() just a method of VideoTensorMixin like save()?

file_path=bytes,
audio_tensor=audio_tensor,
video_frame_rate=video_frame_rate,
video_codec=video_codec,
audio_frame_rate=audio_frame_rate,
audio_codec=audio_codec,
audio_format=audio_format,
)
return bytes.getvalue()

def display(self, audio: Optional[AudioTensor] = None) -> None:
"""
Display video data from tensor in notebook.

:param audio: sound to play with video tensor
"""
raise NotImplementedError
if is_notebook():
from IPython.display import Video, display

b = self.to_bytes(audio_tensor=audio)
display(Video(data=b, embed=True, mimetype='video/mp4'))
else:
warnings.warn('Display of video is only possible in a notebook.')
48 changes: 9 additions & 39 deletions docarray/typing/url/video_url.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import warnings
from typing import TYPE_CHECKING, Any, NamedTuple, Type, TypeVar, Union
from typing import TYPE_CHECKING, Any, Type, TypeVar, Union

import numpy as np
from pydantic.tools import parse_obj_as

from docarray.typing.bytes.video_bytes import VideoLoadResult
from docarray.typing.proto_register import _register_proto
from docarray.typing.tensor.audio.audio_ndarray import AudioNdArray
from docarray.typing.tensor.ndarray import NdArray
from docarray.typing.tensor.video import VideoNdArray
from docarray.typing.url.any_url import AnyUrl
from docarray.utils.misc import is_notebook

Expand All @@ -20,12 +17,6 @@
VIDEO_FILE_FORMATS = ['mp4']


class VideoLoadResult(NamedTuple):
video: VideoNdArray
audio: AudioNdArray
key_frame_indices: NdArray


@_register_proto(proto_type_name='video_url')
class VideoUrl(AnyUrl):
"""
Expand Down Expand Up @@ -106,46 +97,25 @@ class MyDoc(BaseDocument):
assert isinstance(key_frame_indices, NdArray)

"""
import av

with av.open(self, **kwargs) as container:
audio_frames = []
video_frames = []
keyframe_indices = []

for frame in container.decode():
if type(frame) == av.audio.frame.AudioFrame:
audio_frames.append(frame.to_ndarray())
elif type(frame) == av.video.frame.VideoFrame:
video_frames.append(frame.to_ndarray(format='rgb24'))

if frame.key_frame == 1:
curr_index = len(video_frames)
keyframe_indices.append(curr_index)

if len(audio_frames) == 0:
audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
else:
audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

video = parse_obj_as(VideoNdArray, np.stack(video_frames))
indices = parse_obj_as(NdArray, keyframe_indices)
from docarray.typing.bytes.video_bytes import VideoBytes

return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)
buffer = VideoBytes(self.load_bytes(**kwargs))
return buffer.load()

def display(self):
"""
Play video from url in notebook.
"""
if is_notebook():
remote_url = True if self.startswith('http') else False

from IPython.display import display

remote_url = True if self.startswith('http') else False

if remote_url:
from IPython.display import Video

display(Video(data=self))
b = self.load_bytes()
display(Video(data=b, embed=True, mimetype='video/mp4'))
else:
import os

Expand Down
14 changes: 14 additions & 0 deletions tests/units/typing/tensor/test_video_tensor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from io import BytesIO

import numpy as np
import pytest
Expand Down Expand Up @@ -130,6 +131,19 @@ def test_save_video_tensor_to_file(video_tensor, tmpdir):
assert os.path.isfile(tmp_file)


@pytest.mark.parametrize(
'video_tensor',
[
parse_obj_as(VideoTorchTensor, torch.zeros(1, 224, 224, 3)),
parse_obj_as(VideoNdArray, np.zeros((1, 224, 224, 3))),
],
)
def test_save_video_tensor_to_bytes(video_tensor, tmpdir):
b = BytesIO()
video_tensor.save(b)
isinstance(b, BytesIO)


@pytest.mark.tensorflow
def test_save_video_tensorflow_tensor_to_file(tmpdir):
tmp_file = str(tmpdir / 'tmp.mp4')
Expand Down