Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a452268
feat: add video url and tensors to proto
Jan 3, 2023
3ccb697
feat: add video url and video ndarray
Jan 3, 2023
dc957d1
feat: add video torch tensor and tests
Jan 4, 2023
fc86920
fix: mypy checks
Jan 4, 2023
8a55e0b
chore: add av to video extra
Jan 4, 2023
5cb098a
fix: allow dim 3
Jan 4, 2023
3ba1f78
test: wip video load and save
Jan 5, 2023
be63926
refactor: move to numpy to computational backend
Jan 6, 2023
395a495
fix: video load and save
Jan 11, 2023
406ec80
test: adjust tests
Jan 11, 2023
091e79a
fix: video load and save and add docstrings
Jan 11, 2023
dee1146
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
e4106a8
fix: fix some imports after merging
Jan 11, 2023
23ee930
docs: add doc strings and fix example urls
Jan 11, 2023
7ab8dbd
docs: small fixes in docs
Jan 11, 2023
ecf01d8
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 11, 2023
5295dd1
refactor: rename save to mp4 file to save
Jan 11, 2023
b3f2ccb
feat: add shape method to comp backend
Jan 16, 2023
20ecf2c
refactor: move validate shape to video tensor mixin
Jan 16, 2023
711d105
refactor: extract private load and make separate methods for frames
Jan 16, 2023
0c9c1fd
fix: use torch shape instead of size method
Jan 16, 2023
e3a465c
fix: add typehint to shape in comp backend
Jan 16, 2023
40eac93
docs: add supported strings for skip type
Jan 16, 2023
a700f30
fix: apply suggestions from code review
Jan 17, 2023
94572fd
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
07ceae8
fix: small change to trigger ci again
Jan 17, 2023
c2e129d
fix: extract shape var
Jan 17, 2023
d50ae67
fix: introduce compbackendinterface
Jan 17, 2023
2e365e6
fix: revert previous pr and fix for mypy
Jan 17, 2023
c44a035
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
95b0b81
Merge remote-tracking branch 'origin/feat-rewrite-v2' into feat-add-v…
Jan 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix: video load and save
Signed-off-by: anna-charlotte <[email protected]>
  • Loading branch information
anna-charlotte committed Jan 11, 2023
commit 395a495aaf09e452b9ac7e663dc1f234eaadcbd9
10 changes: 6 additions & 4 deletions docarray/predefined_document/video.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from docarray.document import BaseDocument
from docarray.typing import AnyTensor, Embedding
from docarray.typing.tensor.audio.audio_tensor import AudioTensor
from docarray.typing.tensor.video.video_tensor import VideoTensor
from docarray.typing.url.video_url import VideoUrl

Expand All @@ -11,9 +12,9 @@
class Video(BaseDocument):
"""
Document for handling video.
The Video Document can contain a VideoUrl (`Video.url`), a VideoTensor
(`Video.tensor`), an AnyTensor ('Video.key_frame_indices), and an Embedding
(`Video.embedding`).
The Video Document can contain a VideoUrl (`Video.url`), an AudioTensor
(`Video.audio_tensor`), a VideoTensor (`Video.video_tensor`), an AnyTensor
('Video.key_frame_indices), and an Embedding (`Video.embedding`).

EXAMPLE USAGE:

Expand All @@ -26,6 +27,7 @@ class Video(BaseDocument):
"""

url: Optional[VideoUrl]
tensor: Optional[VideoTensor]
audio_tensor: Optional[AudioTensor]
video_tensor: Optional[VideoTensor]
key_frame_indices: Optional[AnyTensor]
embedding: Optional[Embedding]
45 changes: 33 additions & 12 deletions docarray/typing/tensor/video/abstract_video_tensor.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,65 @@
from abc import ABC
from typing import BinaryIO, TypeVar, Union
from typing import BinaryIO, Optional, TypeVar, Union

import numpy as np

from docarray.typing.tensor.abstract_tensor import AbstractTensor
from docarray.typing.tensor.audio.audio_tensor import AudioTensor

T = TypeVar('T', bound='AbstractVideoTensor')


class AbstractVideoTensor(AbstractTensor, ABC):
def save_to_file(
def save_to_mp4_file(
self: 'T',
file_path: Union[str, BinaryIO],
frame_rate: int = 24,
codec: str = 'h264',
audio_tensor: Optional[AudioTensor] = None,
video_frame_rate: int = 30,
video_codec: str = 'h264',
audio_frame_rate: int = 48000,
audio_codec: str = 'aac',
audio_format: str = 'fltp',
) -> None:
"""
Save video tensor to a .mp4 file.

:param file_path: path to a .mp4 file. If file is a string, open the file by
that name, otherwise treat it as a file-like object.
:param frame_rate: frames per second.
:param codec: the name of a decoder/encoder.
:param video_frame_rate: frames per second.
:param video_codec: the name of a decoder/encoder.
"""
import av

np_tensor = self.get_comp_backend().to_numpy(array=self) # type: ignore
video_tensor = np_tensor.astype('uint8')
import av

with av.open(file_path, mode='w') as container:
if video_tensor.ndim == 3:
video_tensor = np.expand_dims(video_tensor, axis=0)

stream = container.add_stream(codec, rate=frame_rate)
stream.height = video_tensor.shape[-3]
stream.width = video_tensor.shape[-2]
stream_video = container.add_stream(video_codec, rate=video_frame_rate)
stream_video.height = video_tensor.shape[-3]
stream_video.width = video_tensor.shape[-2]

if audio_tensor is not None:
stream_audio = container.add_stream(audio_codec)
audio_np = audio_tensor.get_comp_backend().to_numpy(array=audio_tensor)
audio_layout = 'stereo' if audio_np.shape[-2] == 2 else 'mono'

for i, audio in enumerate(audio_np):
frame = av.AudioFrame.from_ndarray(
array=audio, format=audio_format, layout=audio_layout
)
frame.rate = audio_frame_rate
for packet in stream_audio.encode(frame):
container.mux(packet)

for vid in video_tensor:
frame = av.VideoFrame.from_ndarray(vid, format='rgb24')
for packet in stream.encode(frame):
for packet in stream_video.encode(frame):
container.mux(packet)

for packet in stream.encode(None):
for packet in stream_audio.encode(None):
container.mux(packet)
for packet in stream_video.encode(None):
container.mux(packet)
38 changes: 23 additions & 15 deletions docarray/typing/url/video_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
from pydantic.tools import parse_obj_as

from docarray.typing import AudioNdArray, NdArray
from docarray.typing.tensor.video import VideoNdArray
from docarray.typing.url.any_url import AnyUrl

Expand Down Expand Up @@ -50,19 +51,20 @@ def validate(
return cls(str(url), scheme=None)

def load(
self: T, only_keyframes: bool = False, **kwargs
) -> Union[VideoNdArray, Tuple[VideoNdArray, np.ndarray]]:
self: T, only_keyframes: bool = False, audio_format: str = 'fltp', **kwargs
) -> Union[VideoNdArray, Tuple[AudioNdArray, VideoNdArray, NdArray]]:
"""
Load the data from the url into a VideoNdArray or Tuple of VideoNdArray and
np.ndarray.
Load the data from the url into a VideoNdArray or Tuple of AudioNdArray,
VideoNdArray and NdArray.

:param only_keyframes: if True keep only the keyframes, if False keep all frames
and store the indices of the keyframes in :attr:`.tags`
:param kwargs: supports all keyword arguments that are being supported by
av.open() as described in:
https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open
:return: np.ndarray representing the audio file content, list of key frame
indices if only_keyframe False.
:return: AudioNdArray representing the audio content, VideoNdArray representing
the images of the video, NdArray of key frame indices if only_keyframe
False, else only VideoNdArray representing the keyframes.
"""
import av

Expand All @@ -71,19 +73,25 @@ def load(
stream = container.streams.video[0]
stream.codec_context.skip_frame = 'NONKEY'

frames = []
audio_frames = []
video_frames = []
keyframe_indices = []

for i, frame in enumerate(container.decode(video=0)):
for frame in container.decode():
if type(frame) == av.audio.frame.AudioFrame:
audio_frames.append(frame.to_ndarray(format=audio_format))
elif type(frame) == av.video.frame.VideoFrame:
video_frames.append(frame.to_ndarray(format='rgb24'))

frame_np = frame.to_ndarray(format='rgb24')
frames.append(frame_np)
if not only_keyframes and frame.key_frame == 1:
keyframe_indices.append(i)
if not only_keyframes and frame.key_frame == 1:
curr_index = len(video_frames)
keyframe_indices.append(curr_index)

frames_vid = parse_obj_as(VideoNdArray, np.stack(frames))
video = parse_obj_as(VideoNdArray, np.stack(video_frames))

if only_keyframes:
return frames_vid
return video
else:
return frames_vid, np.array(keyframe_indices)
audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))
indices = parse_obj_as(NdArray, keyframe_indices)
return audio, video, indices