refactor: default index name following schema

Signed-off-by: jupyterjazz <[email protected]>
docarray · JoanFM · Jul 10, 2023 · May 17, 2023 · Jun 12, 2023 · Jun 15, 2023
commit 7a5ed5e1ab400f02b01271ae1f29886b26d3e947
diff --git a/docarray/index/backends/redis.py b/docarray/index/backends/redis.py
@@ -75,7 +75,6 @@
 class RedisDocumentIndex(BaseDocIndex, Generic[TSchema]):
     def __init__(self, db_config=None, **kwargs):
         """Initialize RedisDocumentIndex"""
-        self._index_name = None
         super().__init__(db_config=db_config, **kwargs)
         self._db_config = cast(RedisDocumentIndex.DBConfig, self._db_config)
 
@@ -175,12 +174,20 @@ def _check_index_exists(self, index_name: str) -> bool:
 
     @property
     def index_name(self):
-        if not self._index_name:
-            self._index_name = index_name = (
-                self._db_config.index_name or 'index_name__' + self._random_name()
+        default_index_name = (
+            self._schema.__name__.lower() if self._schema is not None else None
+        )
+        if default_index_name is None:
+            err_msg = (
+                'A RedisDocumentIndex must be typed with a Document type. '
+                'To do so, use the syntax: RedisDocumentIndex[DocumentType]'
             )
-            self._logger.debug(f'Retrieved index name: {index_name}')
-        return self._index_name
+
+            self._logger.error(err_msg)
+            raise ValueError(err_msg)
+        index_name = self._db_config.index_name or default_index_name
+        self._logger.debug(f'Retrieved index name: {index_name}')
+        return index_name
 
     @property
     def out_schema(self) -> Type[BaseDoc]:

diff --git a/tests/index/redis/fixtures.py b/tests/index/redis/fixtures.py
@@ -2,7 +2,6 @@
 import time
 import uuid
 import pytest
-import redis
 
 
 @pytest.fixture(scope='session', autouse=True)
@@ -18,19 +17,5 @@ def start_redis():
 
 
 @pytest.fixture(scope='function')
-def tmp_collection_name():
+def tmp_index_name():
     return uuid.uuid4().hex
-
-
-@pytest.fixture
-def redis_client():
-    """This fixture provides a Redis client"""
-    client = redis.Redis(host='localhost', port=6379)
-    yield client
-    client.flushall()
-
-
-@pytest.fixture
-def redis_config(redis_client):
-    """This fixture provides the Redis client and flushes all data after each test case"""
-    return redis_client
diff --git a/tests/index/redis/test_configurations.py b/tests/index/redis/test_configurations.py
@@ -5,7 +5,7 @@
 from docarray import BaseDoc
 from docarray.index import RedisDocumentIndex
 from docarray.typing import NdArray
-from tests.index.redis.fixtures import start_redis  # noqa: F401
+from tests.index.redis.fixtures import start_redis, tmp_index_name  # noqa: F401
 
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
@@ -23,16 +23,16 @@ class Schema(BaseDoc):
     assert index.num_docs() == 10
 
 
-def test_configure_index():
+def test_configure_index(tmp_index_name):
     class Schema(BaseDoc):
         tens: NdArray[100] = Field(space='cosine')
         title: str
         year: int
 
     types = {'id': 'TAG', 'tens': 'VECTOR', 'title': 'TEXT', 'year': 'NUMERIC'}
-    index = RedisDocumentIndex[Schema](host='localhost')
+    index = RedisDocumentIndex[Schema](host='localhost', index_name=tmp_index_name)
 
-    attr_bytes = index._client.ft(index._index_name).info()['attributes']
+    attr_bytes = index._client.ft(index.index_name).info()['attributes']
     attr = [[byte.decode() for byte in sublist] for sublist in attr_bytes]
 
     assert len(Schema.__fields__) == len(attr)

diff --git a/tests/index/redis/test_find.py b/tests/index/redis/test_find.py
@@ -8,7 +8,7 @@
 from docarray import BaseDoc, DocList
 from docarray.index import RedisDocumentIndex
 from docarray.typing import NdArray, TorchTensor
-from tests.index.redis.fixtures import start_redis  # noqa: F401
+from tests.index.redis.fixtures import start_redis, tmp_index_name  # noqa: F401
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
 
@@ -27,9 +27,9 @@ class TorchDoc(BaseDoc):
 
 
 @pytest.mark.parametrize('space', ['cosine', 'l2', 'ip'])
-def test_find_simple_schema(space):
+def test_find_simple_schema(space, tmp_index_name):
     schema = get_simple_schema(space=space)
-    db = RedisDocumentIndex[schema](host='localhost')
+    db = RedisDocumentIndex[schema](host='localhost', index_name=tmp_index_name)
 
     index_docs = [schema(tens=np.random.rand(N_DIM)) for _ in range(10)]
     index_docs.append(schema(tens=np.ones(N_DIM)))
@@ -68,8 +68,8 @@ def test_find_limit_larger_than_index():
 
 
 @pytest.mark.parametrize('space', ['cosine', 'l2', 'ip'])
-def test_find_torch(space):
-    db = RedisDocumentIndex[TorchDoc](host='localhost')
+def test_find_torch(space, tmp_index_name):
+    db = RedisDocumentIndex[TorchDoc](host='localhost', index_name=tmp_index_name)
     index_docs = [TorchDoc(tens=np.random.rand(N_DIM)) for _ in range(10)]
     index_docs.append(TorchDoc(tens=np.ones(N_DIM, dtype=np.float32)))
     db.index(index_docs)
@@ -91,13 +91,13 @@ def test_find_torch(space):
 
 @pytest.mark.tensorflow
 @pytest.mark.parametrize('space', ['cosine', 'l2', 'ip'])
-def test_find_tensorflow(space):
+def test_find_tensorflow(space, tmp_index_name):
     from docarray.typing import TensorFlowTensor
 
     class TfDoc(BaseDoc):
         tens: TensorFlowTensor[10]
 
-    db = RedisDocumentIndex[TfDoc](host='localhost')
+    db = RedisDocumentIndex[TfDoc](host='localhost', index_name=tmp_index_name)
 
     index_docs = [TfDoc(tens=np.random.rand(N_DIM)) for _ in range(10)]
     index_docs.append(TfDoc(tens=np.ones(10)))
@@ -121,12 +121,12 @@ class TfDoc(BaseDoc):
 
 
 @pytest.mark.parametrize('space', ['cosine', 'l2', 'ip'])
-def test_find_flat_schema(space):
+def test_find_flat_schema(space, tmp_index_name):
     class FlatSchema(BaseDoc):
         tens_one: NdArray = Field(dim=N_DIM, space=space)
         tens_two: NdArray = Field(dim=50, space=space)
 
-    index = RedisDocumentIndex[FlatSchema](host='localhost')
+    index = RedisDocumentIndex[FlatSchema](host='localhost', index_name=tmp_index_name)
 
     index_docs = [
         FlatSchema(tens_one=np.random.rand(N_DIM), tens_two=np.random.rand(50))
@@ -156,7 +156,7 @@ class FlatSchema(BaseDoc):
 
 
 @pytest.mark.parametrize('space', ['cosine', 'l2', 'ip'])
-def test_find_nested_schema(space):
+def test_find_nested_schema(space, tmp_index_name):
     class SimpleDoc(BaseDoc):
         tens: NdArray[N_DIM] = Field(space=space)
 
@@ -168,7 +168,9 @@ class DeepNestedDoc(BaseDoc):
         d: NestedDoc
         tens: NdArray = Field(space=space, dim=N_DIM)
 
-    index = RedisDocumentIndex[DeepNestedDoc](host='localhost')
+    index = RedisDocumentIndex[DeepNestedDoc](
+        host='localhost', index_name=tmp_index_name
+    )
 
     index_docs = [
         DeepNestedDoc(
@@ -243,12 +245,12 @@ class MyDoc(BaseDoc):
         assert q.id == matches[0].id
 
 
-def test_query_builder():
+def test_query_builder(tmp_index_name):
     class SimpleSchema(BaseDoc):
         tensor: NdArray[N_DIM] = Field(space='cosine')
         price: int
 
-    db = RedisDocumentIndex[SimpleSchema](host='localhost')
+    db = RedisDocumentIndex[SimpleSchema](host='localhost', index_name=tmp_index_name)
 
     index_docs = [
         SimpleSchema(tensor=np.array([i + 1] * 10), price=i + 1) for i in range(10)
@@ -269,7 +271,7 @@ class SimpleSchema(BaseDoc):
         assert doc.price <= 3
 
 
-def test_text_search():
+def test_text_search(tmp_index_name):
     class SimpleSchema(BaseDoc):
         description: str
         some_field: Optional[int]
@@ -286,15 +288,15 @@ class SimpleSchema(BaseDoc):
 
     docs = [SimpleSchema(description=text) for text in texts_to_index]
 
-    db = RedisDocumentIndex[SimpleSchema](host='localhost')
+    db = RedisDocumentIndex[SimpleSchema](host='localhost', index_name=tmp_index_name)
     db.index(docs)
 
     docs, _ = db.text_search(query=query_string, search_field='description')
 
     assert docs[0].description == texts_to_index[0]
 
 
-def test_filter():
+def test_filter(tmp_index_name):
     class SimpleSchema(BaseDoc):
         description: str
         price: int
@@ -304,7 +306,7 @@ class SimpleSchema(BaseDoc):
     doc3 = SimpleSchema(description='Random book', price=40)
     docs = [doc1, doc2, doc3]
 
-    db = RedisDocumentIndex[SimpleSchema](host='localhost')
+    db = RedisDocumentIndex[SimpleSchema](host='localhost', index_name=tmp_index_name)
     db.index(docs)
 
     # filter on price < 45

diff --git a/tests/index/redis/test_index_get_del.py b/tests/index/redis/test_index_get_del.py
@@ -5,7 +5,7 @@
 from docarray import BaseDoc
 from docarray.index import RedisDocumentIndex
 from docarray.typing import NdArray
-from tests.index.redis.fixtures import start_redis  # noqa: F401
+from tests.index.redis.fixtures import start_redis, tmp_index_name  # noqa: F401
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
 
@@ -39,8 +39,8 @@ def test_num_docs(ten_simple_docs):
     assert index.num_docs() == 10
 
 
-def test_get_single(ten_simple_docs):
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
+def test_get_single(ten_simple_docs, tmp_index_name):
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     index.index(ten_simple_docs)
 
     assert index.num_docs() == 10
@@ -54,9 +54,9 @@ def test_get_single(ten_simple_docs):
         index['some_id']
 
 
-def test_get_multiple(ten_simple_docs):
+def test_get_multiple(ten_simple_docs, tmp_index_name):
     docs_to_get_idx = [0, 2, 4, 6, 8]
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     index.index(ten_simple_docs)
 
     assert index.num_docs() == 10
@@ -68,8 +68,8 @@ def test_get_multiple(ten_simple_docs):
         assert np.allclose(d_out.tens, d_in.tens)
 
 
-def test_del_single(ten_simple_docs):
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
+def test_del_single(ten_simple_docs, tmp_index_name):
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     index.index(ten_simple_docs)
     assert index.num_docs() == 10
 
@@ -82,10 +82,10 @@ def test_del_single(ten_simple_docs):
         index[doc_id]
 
 
-def test_del_multiple(ten_simple_docs):
+def test_del_multiple(ten_simple_docs, tmp_index_name):
     docs_to_del_idx = [0, 2, 4, 6, 8]
 
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     index.index(ten_simple_docs)
 
     assert index.num_docs() == 10
@@ -101,8 +101,8 @@ def test_del_multiple(ten_simple_docs):
             assert np.allclose(index[doc.id].tens, doc.tens)
 
 
-def test_contains(ten_simple_docs):
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
+def test_contains(ten_simple_docs, tmp_index_name):
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     index.index(ten_simple_docs)
 
     for doc in ten_simple_docs:

diff --git a/tests/index/redis/test_persist_data.py b/tests/index/redis/test_persist_data.py
@@ -5,7 +5,7 @@
 from docarray import BaseDoc
 from docarray.index import RedisDocumentIndex
 from docarray.typing import NdArray
-from tests.index.redis.fixtures import start_redis  # noqa: F401
+from tests.index.redis.fixtures import start_redis, tmp_index_name  # noqa: F401
 
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
@@ -15,12 +15,11 @@ class SimpleDoc(BaseDoc):
     tens: NdArray[10] = Field(dim=1000)
 
 
-def test_persist():
+def test_persist(tmp_index_name):
     query = SimpleDoc(tens=np.random.random((10,)))
 
     # create index
-    index = RedisDocumentIndex[SimpleDoc](host='localhost')
-    index_name = index._index_name
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
 
     assert index.num_docs() == 0
 
@@ -29,7 +28,7 @@ def test_persist():
     find_results_before = index.find(query, search_field='tens', limit=5)
 
     # load existing index
-    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=index_name)
+    index = RedisDocumentIndex[SimpleDoc](host='localhost', index_name=tmp_index_name)
     assert index.num_docs() == 10
     find_results_after = index.find(query, search_field='tens', limit=5)
     for doc_before, doc_after in zip(find_results_before[0], find_results_after[0]):

diff --git a/tests/index/redis/test_subindex.py b/tests/index/redis/test_subindex.py
@@ -22,22 +22,22 @@ class ListDoc(BaseDoc):
     list_tens: NdArray[20] = Field(space='l2')
 
 
-class MyDoc(BaseDoc):
+class NestedDoc(BaseDoc):
     docs: DocList[SimpleDoc]
     list_docs: DocList[ListDoc]
     my_tens: NdArray[30] = Field(space='l2')
 
 
 @pytest.fixture(scope='session')
 def index():
-    index = RedisDocumentIndex[MyDoc](host='localhost')
+    index = RedisDocumentIndex[NestedDoc](host='localhost')
     return index
 
 
 @pytest.fixture(scope='session')
 def data():
     my_docs = [
-        MyDoc(
+        NestedDoc(
             id=f'{i}',
             docs=DocList[SimpleDoc](
                 [
@@ -99,7 +99,7 @@ def test_subindex_index(index, data):
 def test_subindex_get(index, data):
     index.index(data)
     doc = index['1']
-    assert type(doc) == MyDoc
+    assert type(doc) == NestedDoc
     assert doc.id == '1'
     assert len(doc.docs) == 5
     assert type(doc.docs[0]) == SimpleDoc
@@ -158,7 +158,7 @@ def test_subindex_contain(index, data):
     assert not index.subindex_contains(empty_doc)
 
     # Empty index
-    empty_index = RedisDocumentIndex[MyDoc](host='localhost')
+    empty_index = RedisDocumentIndex[NestedDoc](host='localhost')
     assert empty_doc not in empty_index
 
 
@@ -174,7 +174,7 @@ def test_find_subindex(index, data):
     root_docs, docs, scores = index.find_subindex(
         query, subindex='docs', search_field='simple_tens', limit=5
     )
-    assert type(root_docs[0]) == MyDoc
+    assert type(root_docs[0]) == NestedDoc
     assert type(docs[0]) == SimpleDoc
     assert len(scores) == 5
     for root_doc, doc in zip(root_docs, docs):
@@ -188,7 +188,7 @@ def test_find_subindex(index, data):
     )
     assert len(docs) == 5
     assert len(scores) == 5
-    assert type(root_docs[0]) == MyDoc
+    assert type(root_docs[0]) == NestedDoc
     assert type(docs[0]) == SimpleDoc
     for root_doc, doc in zip(root_docs, docs):
         assert np.allclose(doc.simple_tens, np.ones(10))