Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6e9817c
feat: add rich display for doc and da
Jan 17, 2023
2941269
fix: wip plot
Jan 18, 2023
5949e7c
fix: wip plot
Jan 18, 2023
05fa0fa
fix: wip plot
Jan 19, 2023
718fe52
feat: add math package and minmax normalize
Jan 19, 2023
3669de1
fix: summary for document
Jan 19, 2023
c56e975
chore: update poetry lock after rebase
Jan 19, 2023
b0ba3f3
fix: move all from plotmixin to base document
Jan 19, 2023
bd8cf3b
feat: add docs schema summary
Jan 20, 2023
25be9cc
feat: add document array summary
Jan 20, 2023
b7a915b
fix: display doc within doc
Jan 20, 2023
c6ee8ec
fix: in notebook print docs summary
Jan 20, 2023
d45988a
fix: move summary from da to abstract da
Jan 23, 2023
40c8eea
fix: get schema for doc
Jan 23, 2023
3bdb9d0
fix: wip doc summary
Jan 23, 2023
ea12600
fix: wip clean up
Jan 23, 2023
9321c0b
test: add test for da pretty print
Jan 23, 2023
189c33c
docs: update note
Jan 23, 2023
93046af
docs: add some documentation
Jan 23, 2023
fc0deec
fix: apply samis suggestion
Jan 23, 2023
c8f3849
fix: mypy checks
Jan 23, 2023
15b94fc
fix: move to plot mixin
Jan 23, 2023
58229aa
fix: remove redundant line
Jan 24, 2023
e55ba3b
fix: remove comments
Jan 24, 2023
147742d
feat: add schema highlighter
Jan 24, 2023
59bd3a6
fix: add plotmixin to mixin init
Jan 24, 2023
fd26a43
fix: adjust da summary
Jan 24, 2023
675b5c5
fix: move minmaxnormalize to comp backend
Jan 24, 2023
a375d19
fix: remove redundant lines
Jan 24, 2023
c3b44bd
fix: add squeeze and detach to comp backend
Jan 24, 2023
0d5653c
fix: apply suggestion from code review
Jan 24, 2023
6d479ab
refactor: rename iterable attrs
Jan 24, 2023
a1c4678
fix: clean up
Jan 24, 2023
3aac1c9
fix: import
Jan 24, 2023
eb75060
fix: iterate over fields instead of annotations
Jan 24, 2023
3cc1b55
fix: remove math package since moved to comp backends
Jan 24, 2023
ab585eb
refactor: use single quotes
Jan 24, 2023
b838ec9
fix: apply suggestions from code review
Jan 24, 2023
c56aa6e
fix: extract summary to doc summary class
Jan 24, 2023
b2b5bdd
fix: add pretty print for base document
Jan 25, 2023
7aa7e58
fix: use rich capture instead of string io
Jan 25, 2023
2ae8d6a
fix: add colors for optional and union and use only single quotes
Jan 25, 2023
0b881b1
fix: extract display classes to display package
Jan 25, 2023
6ba4eff
fix: make da not optional in da summary
Jan 25, 2023
a70142a
fix: set _console instead of initializing new one everytime in __str__
Jan 25, 2023
2a6bd5c
fix: put console at module level
Jan 25, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docarray/array/abstract_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import TYPE_CHECKING, Any, Generic, List, Sequence, Type, TypeVar, Union

from docarray.base_document import BaseDocument
from docarray.display.document_array_summary import DocumentArraySummary
from docarray.typing import NdArray
from docarray.typing.abstract_type import AbstractType

Expand All @@ -17,6 +18,9 @@ class AnyDocumentArray(Sequence[BaseDocument], Generic[T_doc], AbstractType):
document_type: Type[BaseDocument]
tensor_type: Type['AbstractTensor'] = NdArray

def __repr__(self):
return f'<{self.__class__.__name__} (length={len(self)})>'

def __class_getitem__(cls, item: Type[BaseDocument]):
if not issubclass(item, BaseDocument):
raise ValueError(
Expand Down Expand Up @@ -209,3 +213,10 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
return sequence
else:
return [item for sublist in sequence for item in sublist]

def summary(self):
"""
Print a summary of this DocumentArray object and a summary of the schema of its
Document type.
"""
DocumentArraySummary(self).summary()
13 changes: 11 additions & 2 deletions docarray/base_document/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@

import orjson
from pydantic import BaseModel, Field, parse_obj_as
from rich.console import Console

from docarray.base_document.abstract_document import AbstractDocument
from docarray.base_document.base_node import BaseNode
from docarray.base_document.io.json import orjson_dumps, orjson_dumps_and_decode
from docarray.base_document.mixins import ProtoMixin
from docarray.base_document.mixins import PlotMixin, ProtoMixin
from docarray.typing import ID

_console: Console = Console()

class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):

class BaseDocument(BaseModel, PlotMixin, ProtoMixin, AbstractDocument, BaseNode):
"""
The base class for Document
"""
Expand All @@ -34,3 +37,9 @@ def _get_field_type(cls, field: str) -> Type['BaseDocument']:
:return:
"""
return cls.__fields__[field].outer_type_

def __str__(self):
with _console.capture() as capture:
_console.print(self)

return capture.get().strip()
3 changes: 2 additions & 1 deletion docarray/base_document/mixins/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from docarray.base_document.mixins.plot import PlotMixin
from docarray.base_document.mixins.proto import ProtoMixin

__all__ = ['ProtoMixin']
__all__ = ['PlotMixin', 'ProtoMixin']
17 changes: 17 additions & 0 deletions docarray/base_document/mixins/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from docarray.base_document.abstract_document import AbstractDocument
from docarray.display.document_summary import DocumentSummary


class PlotMixin(AbstractDocument):
def summary(self) -> None:
"""Print non-empty fields and nested structure of this Document object."""
DocumentSummary(doc=self).summary()

@classmethod
def schema_summary(cls) -> None:
"""Print a summary of the Documents schema."""
DocumentSummary.schema_summary(cls)

def _ipython_display_(self):
"""Displays the object in IPython as a summary"""
self.summary()
46 changes: 46 additions & 0 deletions docarray/computation/abstract_comp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ def n_dim(array: 'TTensor') -> int:
"""
...

@staticmethod
@abstractmethod
def squeeze(tensor: 'TTensor') -> 'TTensor':
"""
Returns a tensor with all the dimensions of tensor of size 1 removed.
"""
...

@staticmethod
@abstractmethod
def to_numpy(array: 'TTensor') -> 'np.ndarray':
Expand Down Expand Up @@ -85,6 +93,44 @@ def reshape(tensor: 'TTensor', shape: Tuple[int, ...]) -> 'TTensor':
"""
...

@staticmethod
@abstractmethod
def detach(tensor: 'TTensor') -> 'TTensor':
"""
Returns the tensor detached from its current graph.

:param tensor: tensor to be detached
:return: a detached tensor with the same data.
"""
...

@staticmethod
@abstractmethod
def minmax_normalize(
tensor: 'TTensor',
t_range: Tuple = (0, 1),
x_range: Optional[Tuple] = None,
eps: float = 1e-7,
):
"""
Normalize values in `tensor` into `t_range`.

`tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
normalization is row-based.

.. note::
- with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
- with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
of the data to 0.

:param tensor: the data to be normalized
:param t_range: a tuple represents the target range.
:param x_range: a tuple represents tensors range.
:param eps: a small jitter to avoid divide by zero
:return: normalized data in `t_range`
"""
...

class Retrieval(ABC, typing.Generic[TTensorRetrieval]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
49 changes: 49 additions & 0 deletions docarray/computation/numpy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def to_device(tensor: 'np.ndarray', device: str) -> 'np.ndarray':
def n_dim(array: 'np.ndarray') -> int:
return array.ndim

@staticmethod
def squeeze(tensor: 'np.ndarray') -> 'np.ndarray':
"""
Returns a tensor with all the dimensions of tensor of size 1 removed.
"""
return tensor.squeeze()

@staticmethod
def to_numpy(array: 'np.ndarray') -> 'np.ndarray':
return array
Expand Down Expand Up @@ -85,6 +92,48 @@ def reshape(array: 'np.ndarray', shape: Tuple[int, ...]) -> 'np.ndarray':
"""
return array.reshape(shape)

@staticmethod
def detach(tensor: 'np.ndarray') -> 'np.ndarray':
"""
Returns the tensor detached from its current graph.

:param tensor: tensor to be detached
:return: a detached tensor with the same data.
"""
return tensor

@staticmethod
def minmax_normalize(
tensor: 'np.ndarray',
t_range: Tuple = (0, 1),
x_range: Optional[Tuple] = None,
eps: float = 1e-7,
):
"""
Normalize values in `tensor` into `t_range`.

`tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
normalization is row-based.

.. note::
- with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
- with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
of the data to 0.

:param tensor: the data to be normalized
:param t_range: a tuple represents the target range.
:param x_range: a tuple represents tensors range.
:param eps: a small jitter to avoid divide by zero
:return: normalized data in `t_range`
"""
a, b = t_range

min_d = x_range[0] if x_range else np.min(tensor, axis=-1, keepdims=True)
max_d = x_range[1] if x_range else np.max(tensor, axis=-1, keepdims=True)
r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a

return np.clip(r, *((a, b) if a < b else (b, a)))

class Retrieval(AbstractComputationalBackend.Retrieval[np.ndarray]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
54 changes: 54 additions & 0 deletions docarray/computation/torch_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ def empty(
def n_dim(array: 'torch.Tensor') -> int:
return array.ndim

@staticmethod
def squeeze(tensor: 'torch.Tensor') -> 'torch.Tensor':
"""
Returns a tensor with all the dimensions of tensor of size 1 removed.
"""
return torch.squeeze(tensor)

@staticmethod
def to_numpy(array: 'torch.Tensor') -> 'np.ndarray':
return array.cpu().detach().numpy()
Expand All @@ -89,6 +96,53 @@ def reshape(tensor: 'torch.Tensor', shape: Tuple[int, ...]) -> 'torch.Tensor':
"""
return tensor.reshape(shape)

@staticmethod
def detach(tensor: 'torch.Tensor') -> 'torch.Tensor':
"""
Returns the tensor detached from its current graph.

:param tensor: tensor to be detached
:return: a detached tensor with the same data.
"""
return tensor.detach()

@staticmethod
def minmax_normalize(
tensor: 'torch.Tensor',
t_range: Tuple = (0, 1),
x_range: Optional[Tuple] = None,
eps: float = 1e-7,
):
"""
Normalize values in `tensor` into `t_range`.

`tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
normalization is row-based.

.. note::
- with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
- with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
of the data to 0.

:param tensor: the data to be normalized
:param t_range: a tuple represents the target range.
:param x_range: a tuple represents tensors range.
:param eps: a small jitter to avoid divide by zero
:return: normalized data in `t_range`
"""
a, b = t_range

min_d = (
x_range[0] if x_range else torch.min(tensor, dim=-1, keepdim=True).values
)
max_d = (
x_range[1] if x_range else torch.max(tensor, dim=-1, keepdim=True).values
)
r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a

normalized = torch.clip(r, *((a, b) if a < b else (b, a)))
return normalized.to(tensor.dtype)

class Retrieval(AbstractComputationalBackend.Retrieval[torch.Tensor]):
"""
Abstract class for retrieval and ranking functionalities
Expand Down
Empty file added docarray/display/__init__.py
Empty file.
27 changes: 27 additions & 0 deletions docarray/display/document_array_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from docarray.array.abstract_array import AnyDocumentArray


class DocumentArraySummary:
def __init__(self, da: 'AnyDocumentArray'):
self.da = da

def summary(self) -> None:
"""
Print a summary of this DocumentArray object and a summary of the schema of its
Document type.
"""
from rich import box
from rich.console import Console
from rich.panel import Panel
from rich.table import Table

table = Table(box=box.SIMPLE, highlight=True)
table.show_header = False
table.add_row('Type', self.da.__class__.__name__)
table.add_row('Length', str(len(self.da)))

Console().print(Panel(table, title='DocumentArray Summary', expand=False))
self.da.document_type.schema_summary()
Loading