docarray · samsja · Jan 25, 2023 · Jan 17, 2023 · Jan 18, 2023 · Jan 18, 2023
diff --git a/docarray/array/abstract_array.py b/docarray/array/abstract_array.py
@@ -2,6 +2,7 @@
 from typing import TYPE_CHECKING, Any, Generic, List, Sequence, Type, TypeVar, Union
 
 from docarray.base_document import BaseDocument
+from docarray.display.document_array_summary import DocumentArraySummary
 from docarray.typing import NdArray
 from docarray.typing.abstract_type import AbstractType
 
@@ -17,6 +18,9 @@ class AnyDocumentArray(Sequence[BaseDocument], Generic[T_doc], AbstractType):
     document_type: Type[BaseDocument]
     tensor_type: Type['AbstractTensor'] = NdArray
 
+    def __repr__(self):
+        return f'<{self.__class__.__name__} (length={len(self)})>'
+
     def __class_getitem__(cls, item: Type[BaseDocument]):
         if not issubclass(item, BaseDocument):
             raise ValueError(
@@ -209,3 +213,10 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
             return sequence
         else:
             return [item for sublist in sequence for item in sublist]
+
+    def summary(self):
+        """
+        Print a summary of this DocumentArray object and a summary of the schema of its
+        Document type.
+        """
+        DocumentArraySummary(self).summary()
diff --git a/docarray/base_document/document.py b/docarray/base_document/document.py
@@ -3,15 +3,18 @@
 
 import orjson
 from pydantic import BaseModel, Field, parse_obj_as
+from rich.console import Console
 
 from docarray.base_document.abstract_document import AbstractDocument
 from docarray.base_document.base_node import BaseNode
 from docarray.base_document.io.json import orjson_dumps, orjson_dumps_and_decode
-from docarray.base_document.mixins import ProtoMixin
+from docarray.base_document.mixins import PlotMixin, ProtoMixin
 from docarray.typing import ID
 
+_console: Console = Console()
 
-class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):
+
+class BaseDocument(BaseModel, PlotMixin, ProtoMixin, AbstractDocument, BaseNode):
     """
     The base class for Document
     """
@@ -34,3 +37,9 @@ def _get_field_type(cls, field: str) -> Type['BaseDocument']:
         :return:
         """
         return cls.__fields__[field].outer_type_
+
+    def __str__(self):
+        with _console.capture() as capture:
+            _console.print(self)
+
+        return capture.get().strip()
diff --git a/docarray/base_document/mixins/__init__.py b/docarray/base_document/mixins/__init__.py
@@ -1,3 +1,4 @@
+from docarray.base_document.mixins.plot import PlotMixin
 from docarray.base_document.mixins.proto import ProtoMixin
 
-__all__ = ['ProtoMixin']
+__all__ = ['PlotMixin', 'ProtoMixin']
diff --git a/docarray/base_document/mixins/plot.py b/docarray/base_document/mixins/plot.py
@@ -0,0 +1,17 @@
+from docarray.base_document.abstract_document import AbstractDocument
+from docarray.display.document_summary import DocumentSummary
+
+
+class PlotMixin(AbstractDocument):
+    def summary(self) -> None:
+        """Print non-empty fields and nested structure of this Document object."""
+        DocumentSummary(doc=self).summary()
+
+    @classmethod
+    def schema_summary(cls) -> None:
+        """Print a summary of the Documents schema."""
+        DocumentSummary.schema_summary(cls)
+
+    def _ipython_display_(self):
+        """Displays the object in IPython as a summary"""
+        self.summary()
diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py
@@ -37,6 +37,14 @@ def n_dim(array: 'TTensor') -> int:
         """
         ...
 
+    @staticmethod
+    @abstractmethod
+    def squeeze(tensor: 'TTensor') -> 'TTensor':
+        """
+        Returns a tensor with all the dimensions of tensor of size 1 removed.
+        """
+        ...
+
     @staticmethod
     @abstractmethod
     def to_numpy(array: 'TTensor') -> 'np.ndarray':
@@ -85,6 +93,44 @@ def reshape(tensor: 'TTensor', shape: Tuple[int, ...]) -> 'TTensor':
         """
         ...
 
+    @staticmethod
+    @abstractmethod
+    def detach(tensor: 'TTensor') -> 'TTensor':
+        """
+        Returns the tensor detached from its current graph.
+
+        :param tensor: tensor to be detached
+        :return: a detached tensor with the same data.
+        """
+        ...
+
+    @staticmethod
+    @abstractmethod
+    def minmax_normalize(
+        tensor: 'TTensor',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        ...
+
     class Retrieval(ABC, typing.Generic[TTensorRetrieval]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py
@@ -49,6 +49,13 @@ def to_device(tensor: 'np.ndarray', device: str) -> 'np.ndarray':
     def n_dim(array: 'np.ndarray') -> int:
         return array.ndim
 
+    @staticmethod
+    def squeeze(tensor: 'np.ndarray') -> 'np.ndarray':
+        """
+        Returns a tensor with all the dimensions of tensor of size 1 removed.
+        """
+        return tensor.squeeze()
+
     @staticmethod
     def to_numpy(array: 'np.ndarray') -> 'np.ndarray':
         return array
@@ -85,6 +92,48 @@ def reshape(array: 'np.ndarray', shape: Tuple[int, ...]) -> 'np.ndarray':
         """
         return array.reshape(shape)
 
+    @staticmethod
+    def detach(tensor: 'np.ndarray') -> 'np.ndarray':
+        """
+        Returns the tensor detached from its current graph.
+
+        :param tensor: tensor to be detached
+        :return: a detached tensor with the same data.
+        """
+        return tensor
+
+    @staticmethod
+    def minmax_normalize(
+        tensor: 'np.ndarray',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        a, b = t_range
+
+        min_d = x_range[0] if x_range else np.min(tensor, axis=-1, keepdims=True)
+        max_d = x_range[1] if x_range else np.max(tensor, axis=-1, keepdims=True)
+        r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a
+
+        return np.clip(r, *((a, b) if a < b else (b, a)))
+
     class Retrieval(AbstractComputationalBackend.Retrieval[np.ndarray]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py
@@ -63,6 +63,13 @@ def empty(
     def n_dim(array: 'torch.Tensor') -> int:
         return array.ndim
 
+    @staticmethod
+    def squeeze(tensor: 'torch.Tensor') -> 'torch.Tensor':
+        """
+        Returns a tensor with all the dimensions of tensor of size 1 removed.
+        """
+        return torch.squeeze(tensor)
+
     @staticmethod
     def to_numpy(array: 'torch.Tensor') -> 'np.ndarray':
         return array.cpu().detach().numpy()
@@ -89,6 +96,53 @@ def reshape(tensor: 'torch.Tensor', shape: Tuple[int, ...]) -> 'torch.Tensor':
         """
         return tensor.reshape(shape)
 
+    @staticmethod
+    def detach(tensor: 'torch.Tensor') -> 'torch.Tensor':
+        """
+        Returns the tensor detached from its current graph.
+
+        :param tensor: tensor to be detached
+        :return: a detached tensor with the same data.
+        """
+        return tensor.detach()
+
+    @staticmethod
+    def minmax_normalize(
+        tensor: 'torch.Tensor',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        a, b = t_range
+
+        min_d = (
+            x_range[0] if x_range else torch.min(tensor, dim=-1, keepdim=True).values
+        )
+        max_d = (
+            x_range[1] if x_range else torch.max(tensor, dim=-1, keepdim=True).values
+        )
+        r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a
+
+        normalized = torch.clip(r, *((a, b) if a < b else (b, a)))
+        return normalized.to(tensor.dtype)
+
     class Retrieval(AbstractComputationalBackend.Retrieval[torch.Tensor]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/docarray/display/__init__.py b/docarray/display/__init__.py
diff --git a/docarray/display/document_array_summary.py b/docarray/display/document_array_summary.py
@@ -0,0 +1,27 @@
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from docarray.array.abstract_array import AnyDocumentArray
+
+
+class DocumentArraySummary:
+    def __init__(self, da: 'AnyDocumentArray'):
+        self.da = da
+
+    def summary(self) -> None:
+        """
+        Print a summary of this DocumentArray object and a summary of the schema of its
+        Document type.
+        """
+        from rich import box
+        from rich.console import Console
+        from rich.panel import Panel
+        from rich.table import Table
+
+        table = Table(box=box.SIMPLE, highlight=True)
+        table.show_header = False
+        table.add_row('Type', self.da.__class__.__name__)
+        table.add_row('Length', str(len(self.da)))
+
+        Console().print(Panel(table, title='DocumentArray Summary', expand=False))
+        self.da.document_type.schema_summary()