fix: move minmaxnormalize to comp backend

Signed-off-by: anna-charlotte <[email protected]>
docarray · samsja · Jan 25, 2023 · Jan 17, 2023 · Jan 18, 2023 · Jan 18, 2023
commit 675b5c5f5038441520a3c929c1506ad826402743
diff --git a/docarray/base_document/mixins/plot.py b/docarray/base_document/mixins/plot.py
@@ -7,8 +7,8 @@
 from typing_inspect import is_optional_type, is_union_type
 
 from docarray.base_document.abstract_document import AbstractDocument
-from docarray.math.helper import minmax_normalize
 from docarray.typing import ID
+from docarray.typing.tensor.abstract_tensor import AbstractTensor
 
 if TYPE_CHECKING:
     from rich.console import Console, ConsoleOptions, RenderResult
@@ -176,8 +176,8 @@ class ColorBoxArray:
     Rich representation of an array as coloured blocks.
     """
 
-    def __init__(self, array):
-        self._array = minmax_normalize(array, (0, 5))
+    def __init__(self, array: AbstractTensor):
+        self._array = array.get_comp_backend().minmax_normalize(array, (0, 5))
 
     def __rich_console__(
         self, console: 'Console', options: 'ConsoleOptions'

diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py
@@ -85,6 +85,33 @@ def reshape(tensor: 'TTensor', shape: Tuple[int, ...]) -> 'TTensor':
         """
         ...
 
+    @staticmethod
+    @abstractmethod
+    def minmax_normalize(
+        tensor: 'TTensor',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        ...
+
     class Retrieval(ABC, typing.Generic[TTensorRetrieval]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py
@@ -85,6 +85,38 @@ def reshape(array: 'np.ndarray', shape: Tuple[int, ...]) -> 'np.ndarray':
         """
         return array.reshape(shape)
 
+    @staticmethod
+    def minmax_normalize(
+        tensor: 'np.ndarray',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        a, b = t_range
+
+        min_d = x_range[0] if x_range else np.min(tensor, axis=-1, keepdims=True)
+        max_d = x_range[1] if x_range else np.max(tensor, axis=-1, keepdims=True)
+        r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a
+
+        return np.clip(r, *((a, b) if a < b else (b, a)))
+
     class Retrieval(AbstractComputationalBackend.Retrieval[np.ndarray]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py
@@ -89,6 +89,45 @@ def reshape(tensor: 'torch.Tensor', shape: Tuple[int, ...]) -> 'torch.Tensor':
         """
         return tensor.reshape(shape)
 
+    @staticmethod
+    def minmax_normalize(
+        tensor: 'torch.Tensor',
+        t_range: Tuple = (0, 1),
+        x_range: Optional[Tuple] = None,
+        eps: float = 1e-7,
+    ):
+        """
+        Normalize values in `tensor` into `t_range`.
+
+        `tensor` can be a 1D array or a 2D array. When `tensor` is a 2D array, then
+        normalization is row-based.
+
+        .. note::
+            - with `t_range=(0, 1)` will normalize the min-value of data to 0, max to 1;
+            - with `t_range=(1, 0)` will normalize the min-value of data to 1, max value
+              of the data to 0.
+
+        :param tensor: the data to be normalized
+        :param t_range: a tuple represents the target range.
+        :param x_range: a tuple represents tensors range.
+        :param eps: a small jitter to avoid divide by zero
+        :return: normalized data in `t_range`
+        """
+        a, b = t_range
+
+        min_d = (
+            x_range[0] if x_range else torch.min(tensor, dim=-1, keepdim=True).values
+        )
+        max_d = (
+            x_range[1] if x_range else torch.max(tensor, dim=-1, keepdim=True).values
+        )
+        r = (b - a) * (tensor - min_d) / (max_d - min_d + eps) + a
+
+        dtype = tensor.dtype
+        x = torch.clip(r, *((a, b) if a < b else (b, a)))
+        z = x.to(dtype)
+        return z
+
     class Retrieval(AbstractComputationalBackend.Retrieval[torch.Tensor]):
         """
         Abstract class for retrieval and ranking functionalities

diff --git a/tests/units/computation_backends/numpy_backend/test_basics.py b/tests/units/computation_backends/numpy_backend/test_basics.py
@@ -50,3 +50,23 @@ def test_empty_dtype():
 def test_empty_device():
     with pytest.raises(NotImplementedError):
         NumpyCompBackend.empty((10, 3), device='meta')
+
+
+@pytest.mark.parametrize(
+    'array,t_range,x_range,result',
+    [
+        (np.array([0, 1, 2, 3, 4, 5]), (0, 10), None, np.array([0, 2, 4, 6, 8, 10])),
+        (np.array([0, 1, 2, 3, 4, 5]), (0, 10), (0, 10), np.array([0, 1, 2, 3, 4, 5])),
+        (
+            np.array([[0.0, 1.0], [0.0, 1.0]]),
+            (0, 10),
+            None,
+            np.array([[0.0, 10.0], [0.0, 10.0]]),
+        ),
+    ],
+)
+def test_minmax_normalize(array, t_range, x_range, result):
+    output = NumpyCompBackend.minmax_normalize(
+        tensor=array, t_range=t_range, x_range=x_range
+    )
+    assert np.allclose(output, result)
diff --git a/tests/units/computation_backends/torch_backend/test_basics.py b/tests/units/computation_backends/torch_backend/test_basics.py
@@ -53,3 +53,33 @@ def test_empty_device():
     tensor = TorchCompBackend.empty((10, 3), device='meta')
     assert tensor.shape == (10, 3)
     assert tensor.device == torch.device('meta')
+
+
+@pytest.mark.parametrize(
+    'array,t_range,x_range,result',
+    [
+        (
+            torch.tensor([0, 1, 2, 3, 4, 5]),
+            (0, 10),
+            None,
+            torch.tensor([0, 2, 4, 6, 8, 10]),
+        ),
+        (
+            torch.tensor([0, 1, 2, 3, 4, 5]),
+            (0, 10),
+            (0, 10),
+            torch.tensor([0, 1, 2, 3, 4, 5]),
+        ),
+        (
+            torch.tensor([[0.0, 1.0], [0.0, 1.0]]),
+            (0, 10),
+            None,
+            torch.tensor([[0.0, 10.0], [0.0, 10.0]]),
+        ),
+    ],
+)
+def test_minmax_normalize(array, t_range, x_range, result):
+    output = TorchCompBackend.minmax_normalize(
+        tensor=array, t_range=t_range, x_range=x_range
+    )
+    assert torch.allclose(output, result)
diff --git a/tests/units/math/__init__.py b/tests/units/math/__init__.py
diff --git a/tests/units/math/test_helper.py b/tests/units/math/test_helper.py