test(distance): unit tests for distance (#216)

azayz · web-flow · commit 3f4774afce0c · 2022-03-21T22:02:56.000+01:00
diff --git a/tests/unit/math/distance/test_numpy.py b/tests/unit/math/distance/test_numpy.py
@@ -0,0 +1,126 @@
+import numpy as np
+import pytest
+from scipy.sparse import csr_matrix
+
+from docarray.math.distance.numpy import (
+    cosine,
+    euclidean,
+    sparse_cosine,
+    sparse_euclidean,
+    sparse_sqeuclidean,
+    sqeuclidean,
+)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array(
+                [[0.00000000e00, 2.53681537e-02], [2.53681537e-02, 2.22044605e-16]]
+            ),
+        ),
+        (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), np.array([[0]])),
+        (np.array([[0, 0, 0]]), np.array([[0, 0, 0]]), np.array([[0]])),
+        (np.array([[1, 2, 3]]), np.array([[19, 53, 201]]), np.array([[0.06788693]])),
+    ),
+)
+def test_cosine(x_mat, y_mat, result):
+    np.testing.assert_allclose(cosine(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            np.array(
+                [[0.00000000e00, 2.53681537e-02], [2.53681537e-02, 2.22044605e-16]]
+            ),
+        ),
+        (csr_matrix([[1, 2, 3]]), csr_matrix([[1, 2, 3]]), np.array([[0]])),
+        (csr_matrix([[0, 0, 0]]), csr_matrix([[0, 0, 0]]), np.array([[np.nan]])),
+        (
+            csr_matrix([[1, 2, 3]]),
+            csr_matrix([[19, 53, 201]]),
+            np.array([[0.06788693]]),
+        ),
+    ),
+)
+def test_sparse_cosine(x_mat, y_mat, result):
+    np.testing.assert_allclose(sparse_cosine(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array([[0, 27], [27, 0]]),
+        ),
+        (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), np.array([[0]])),
+        (np.array([[0, 0, 0]]), np.array([[0, 0, 0]]), np.array([[0]])),
+        (np.array([[1, 2, 3]]), np.array([[19, 53, 201]]), np.array([[42129]])),
+    ),
+)
+def test_sqeuclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(sqeuclidean(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            np.array([[0, 27], [27, 0]]),
+        ),
+        (csr_matrix([[1, 2, 3]]), csr_matrix([[1, 2, 3]]), np.array([[0]])),
+        (csr_matrix([[0, 0, 0]]), csr_matrix([[0, 0, 0]]), np.array([[0]])),
+        (csr_matrix([[1, 2, 3]]), csr_matrix([[19, 53, 201]]), np.array([[42129]])),
+    ),
+)
+def test_sparse_sqeuclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(sparse_sqeuclidean(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array([[1, 2, 3], [4, 5, 6]]),
+            np.array([[0, 5.19615242], [5.19615242, 0]]),
+        ),
+        (np.array([[1, 2, 3]]), np.array([[1, 2, 3]]), np.array([[0]])),
+        (np.array([[0, 0, 0]]), np.array([[0, 0, 0]]), np.array([[0]])),
+        (np.array([[1, 2, 3]]), np.array([[19, 53, 201]]), np.array([[205.2535018]])),
+    ),
+)
+def test_euclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(euclidean(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            csr_matrix([[1, 2, 3], [4, 5, 6]]),
+            np.array([[0, 5.19615242], [5.19615242, 0]]),
+        ),
+        (csr_matrix([[1, 2, 3]]), csr_matrix([[1, 2, 3]]), np.array([[0]])),
+        (csr_matrix([[0, 0, 0]]), csr_matrix([[0, 0, 0]]), np.array([[0]])),
+        (
+            csr_matrix([[1, 2, 3]]),
+            csr_matrix([[19, 53, 201]]),
+            np.array([[205.2535018]]),
+        ),
+    ),
+)
+def test_sparse_euclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(sparse_euclidean(x_mat, y_mat), result, rtol=1e-5)
diff --git a/tests/unit/math/distance/test_paddle.py b/tests/unit/math/distance/test_paddle.py
@@ -0,0 +1,92 @@
+import numpy as np
+import paddle
+import pytest
+
+from docarray.math.distance.paddle import cosine, euclidean, sqeuclidean
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            np.array([[1.192093e-07, 2.53681537e-02], [2.53681537e-02, 0]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            np.array([[1.192093e-07]]),
+        ),
+        (
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            np.array([[1]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[19, 53, 201]], dtype='float32'),
+            np.array([[0.06788693]]),
+        ),
+    ),
+)
+def test_cosine(x_mat, y_mat, result):
+    np.testing.assert_allclose(cosine(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            np.array([[0, 27], [27, 0]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            np.array([[0]]),
+        ),
+        (
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            np.array([[0]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[19, 53, 201]], dtype='float32'),
+            np.array([[42129]]),
+        ),
+    ),
+)
+def test_sqeuclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(sqeuclidean(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype='float32'),
+            np.array([[0, 5.19615242], [5.19615242, 0]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            np.array([[0]]),
+        ),
+        (
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            paddle.to_tensor([[0, 0, 0]], dtype='float32'),
+            np.array([[0]]),
+        ),
+        (
+            paddle.to_tensor([[1, 2, 3]], dtype='float32'),
+            paddle.to_tensor([[19, 53, 201]], dtype='float32'),
+            np.array([[205.2535018]]),
+        ),
+    ),
+)
+def test_euclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(euclidean(x_mat, y_mat), result, rtol=1e-5)
diff --git a/tests/unit/math/distance/test_tensorflow.py b/tests/unit/math/distance/test_tensorflow.py
@@ -0,0 +1,92 @@
+import numpy as np
+import pytest
+import tensorflow as tf
+
+from docarray.math.distance.tensorflow import cosine, euclidean, sqeuclidean
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            np.array([[1.192093e-07, 2.53681537e-02], [2.53681537e-02, 0.000000e00]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            np.array([[1.192093e-07]]),
+        ),
+        (
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            np.array([[1]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[19, 53, 201]], dtype=tf.float32),
+            np.array([[0.06788693]]),
+        ),
+    ),
+)
+def test_cosine(x_mat, y_mat, result):
+    np.testing.assert_allclose(cosine(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            np.array([[0, 27], [27, 0]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            np.array([[0]]),
+        ),
+        (
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            np.array([[0]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[19, 53, 201]], dtype=tf.float32),
+            np.array([[42129]]),
+        ),
+    ),
+)
+def test_sqeuclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(sqeuclidean(x_mat, y_mat), result, rtol=1e-5)
+
+
+@pytest.mark.parametrize(
+    'x_mat, y_mat, result',
+    (
+        (
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            tf.constant([[1, 2, 3], [4, 5, 6]], dtype=tf.float32),
+            np.array([[0, 5.19615242], [5.19615242, 0]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            np.array([[0]]),
+        ),
+        (
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            tf.constant([[0, 0, 0]], dtype=tf.float32),
+            np.array([[0]]),
+        ),
+        (
+            tf.constant([[1, 2, 3]], dtype=tf.float32),
+            tf.constant([[19, 53, 201]], dtype=tf.float32),
+            np.array([[205.2535018]]),
+        ),
+    ),
+)
+def test_euclidean(x_mat, y_mat, result):
+    np.testing.assert_allclose(euclidean(x_mat, y_mat), result, rtol=1e-5)
diff --git a/tests/unit/math/distance/test_torch.py b/tests/unit/math/distance/test_torch.py