docs: validate intro inmemory and hnsw examples

Signed-off-by: jupyterjazz <[email protected]>
docarray · JoanFM · Aug 1, 2023 · Jun 28, 2023 · Jul 6, 2023 · Jul 6, 2023
commit befc786c43f5b61a9cb3edf835b5b7c27eb59306
diff --git a/docs/user_guide/storing/docindex.md b/docs/user_guide/storing/docindex.md
@@ -38,12 +38,12 @@ Currently, DocArray supports the following vector databases:
 - [Qdrant](https://qdrant.tech/)  |  [Docs](index_qdrant.md)
 - [Elasticsearch](https://www.elastic.co/elasticsearch/) v7 and v8  |  [Docs](index_elastic.md)
 - [HNSWlib](https://github.com/nmslib/hnswlib)  |  [Docs](index_hnswlib.md)
-- InMemoryExactNNSearch  |  [Docs](index_in_memory.md)
+- InMemoryExactNNIndex  |  [Docs](index_in_memory.md)
 
 
 ## Basic Usage
 
-For this user guide you will use the [InMemoryExactNNSearch][docarray.index.backends.in_memory.InMemoryExactNNSearch]
+For this user guide you will use the [InMemoryExactNNIndex][docarray.index.backends.in_memory.InMemoryExactNNIndex]
 because it doesn't require you to launch a database server. Instead, it will store your data locally.
 
 !!! note "Using a different vector database"
@@ -52,14 +52,13 @@ because it doesn't require you to launch a database server. Instead, it will sto
 
 !!! note "InMemory-specific settings"
     The following sections explain the general concept of Document Index by using
-    [InMemoryExactNNSearch][docarray.index.backends.in_memory.InMemoryExactNNSearch] as an example.
-    For InMemory-specific settings, check out the [InMemoryExactNNSearch][docarray.index.backends.in_memory.InMemoryExactNNSearch] documentation
+    `InMemoryExactNNIndex` as an example.
+    For InMemory-specific settings, check out the `InMemoryExactNNIndex` documentation
     [here](index_in_memory.md).
 
-
 ```python
 from docarray import BaseDoc, DocList
-from docarray.index import HnswDocumentIndex
+from docarray.index import InMemoryExactNNIndex
 from docarray.typing import NdArray
 import numpy as np
 
@@ -72,13 +71,13 @@ class MyDoc(BaseDoc):
 # Create documents (using dummy/random vectors)
 docs = DocList[MyDoc](MyDoc(title=f'title #{i}', price=i, embedding=np.random.rand(128)) for i in range(10))
 
-# Initialize a new HnswDocumentIndex instance and add the documents to the index.
-doc_index = HnswDocumentIndex[MyDoc](workdir='./my_index')
+# Initialize a new InMemoryExactNNIndex instance and add the documents to the index.
+doc_index = InMemoryExactNNIndex[MyDoc]()
 doc_index.index(docs)
 
 # Perform a vector search.
 query = np.ones(128)
-retrieved_docs = doc_index.find(query, search_field='embedding', limit=10)
+retrieved_docs, scores = doc_index.find(query, search_field='embedding', limit=10)
 
 # Perform filtering (price < 5)
 query = {'price': {'$lt': 5}}
@@ -87,9 +86,9 @@ filtered_docs = doc_index.filter(query, limit=10)
 # Perform a hybrid search - combining vector search with filtering
 query = (
     doc_index.build_query()  # get empty query object
-    .find(np.ones(128), search_field='embedding')  # add vector similarity search
+    .find(query=np.ones(128), search_field='embedding')  # add vector similarity search
     .filter(filter_query={'price': {'$gte': 2}})  # add filter search
     .build()  # build the query
 )
-results = doc_index.execute_query(query)
+retrieved_docs, scores = doc_index.execute_query(query)
 ```
diff --git a/docs/user_guide/storing/index_hnswlib.md b/docs/user_guide/storing/index_hnswlib.md
@@ -38,7 +38,7 @@ class MyDoc(BaseDoc):
 docs = DocList[MyDoc](MyDoc(title=f'title #{i}', embedding=np.random.rand(128)) for i in range(10))
 
 # Initialize a new HnswDocumentIndex instance and add the documents to the index.
-doc_index = HnswDocumentIndex[MyDoc](workdir='./my_index')
+doc_index = HnswDocumentIndex[MyDoc](work_dir='./my_index')
 doc_index.index(docs)
 
 # Perform a vector search.
@@ -326,8 +326,8 @@ query = (
 )
 
 # execute the combined query and return the results
-results = db.execute_query(query)
-print(f'{results=}')
+retrieved_docs, scores = db.execute_query(query)
+print(f'{retrieved_docs=}')
 ```
 
 In the example above you can see how to form a hybrid query that combines vector similarity search and filtered search
@@ -534,7 +534,7 @@ class YouTubeVideoDoc(BaseDoc):
 
 
 # create a Document Index
-doc_index = HnswDocumentIndex[YouTubeVideoDoc](work_dir='/tmp2')
+doc_index = HnswDocumentIndex[YouTubeVideoDoc](work_dir='./tmp2')
 
 # create some data
 index_docs = [
@@ -611,7 +611,7 @@ class MyDoc(BaseDoc):
 
 
 # create a Document Index
-doc_index = HnswDocumentIndex[MyDoc](work_dir='/tmp3')
+doc_index = HnswDocumentIndex[MyDoc](work_dir='./tmp3')
 
 # create some data
 index_docs = [
@@ -676,7 +676,7 @@ Now we can instantiate our Index and index some data.
 
 ```python
 docs = DocList[MyDoc](
-    [MyDoc(embedding=np.random.rand(10), text=f'I am the first version of Document {i}') for i in range(100)]
+    [MyDoc(embedding=np.random.rand(128), text=f'I am the first version of Document {i}') for i in range(100)]
 )
 index = HnswDocumentIndex[MyDoc]()
 index.index(docs)
@@ -686,7 +686,7 @@ assert index.num_docs() == 100
 Now we can find relevant documents
 
 ```python
-res = index.find(query=docs[0], search_field='tens', limit=100)
+res = index.find(query=docs[0], search_field='embedding', limit=100)
 assert len(res.documents) == 100
 for doc in res.documents:
     assert 'I am the first version' in doc.text
@@ -705,7 +705,7 @@ assert index.num_docs() == 100
 When we retrieve them again we can see that their text attribute has been updated accordingly
 
 ```python
-res = index.find(query=docs[0], search_field='tens', limit=100)
+res = index.find(query=docs[0], search_field='embedding', limit=100)
 assert len(res.documents) == 100
 for doc in res.documents:
     assert 'I am the second version' in doc.text

diff --git a/docs/user_guide/storing/index_in_memory.md b/docs/user_guide/storing/index_in_memory.md
@@ -39,7 +39,7 @@ doc_index.index(docs)
 
 # Perform a vector search.
 query = np.ones(128)
-retrieved_docs = doc_index.find(query, search_field='embedding', limit=10)
+retrieved_docs, scores = doc_index.find(query, search_field='embedding', limit=10)
 ```
 
 ## Initialize
@@ -99,7 +99,7 @@ You can work around this problem by subclassing the predefined Document and addi
         embedding: NdArray[128]
 
 
-    db = InMemoryExactNNIndex[MyDoc](work_dir='test_db')
+    db = InMemoryExactNNIndex[MyDoc]()
     ```
 
 === "Using Field()"
@@ -114,7 +114,7 @@ You can work around this problem by subclassing the predefined Document and addi
         embedding: AnyTensor = Field(dim=128)
 
 
-    db = InMemoryExactNNIndex[MyDoc](work_dir='test_db3')
+    db = InMemoryExactNNIndex[MyDoc]()
     ```
 
 Once the schema of your Document Index is defined in this way, the data that you are indexing can be either of the
@@ -126,11 +126,11 @@ The [next section](#index) goes into more detail about data indexing, but note t
 from docarray import DocList
 
 # data of type TextDoc
-data = DocList[TextDoc](
+data = DocList[MyDoc](
     [
-        TextDoc(text='hello world', embedding=np.random.rand(128)),
-        TextDoc(text='hello world', embedding=np.random.rand(128)),
-        TextDoc(text='hello world', embedding=np.random.rand(128)),
+        MyDoc(text='hello world', embedding=np.random.rand(128)),
+        MyDoc(text='hello world', embedding=np.random.rand(128)),
+        MyDoc(text='hello world', embedding=np.random.rand(128)),
     ]
 )
 
@@ -338,8 +338,8 @@ query = (
 )
 
 # execute the combined query and return the results
-results = db.execute_query(query)
-print(f'{results=}')
+retrieved_docs, scores = db.execute_query(query)
+print(f'{retrieved_docs=}')
 ```
 
 In the example above you can see how to form a hybrid query that combines vector similarity search and filtered search
@@ -403,7 +403,7 @@ If you want to set configurations globally, i.e. for all vector fields in your D
 
 ```python
 from collections import defaultdict
-from docarray.typing import AbstractTensor
+from docarray.typing.tensor.abstract_tensor import AbstractTensor
 new_doc_index = InMemoryExactNNIndex[MyDoc](
     default_column_config=defaultdict(
         dict,
@@ -461,24 +461,24 @@ from docarray.typing import ImageUrl, VideoUrl, AnyTensor
 # define a nested schema
 class ImageDoc(BaseDoc):
     url: ImageUrl
-    tensor: AnyTensor = Field(space='cosine', dim=64)
+    tensor: AnyTensor = Field(space='cosine_sim', dim=64)
 
 
 class VideoDoc(BaseDoc):
     url: VideoUrl
-    tensor: AnyTensor = Field(space='cosine', dim=128)
+    tensor: AnyTensor = Field(space='cosine_sim', dim=128)
 
 
 class YouTubeVideoDoc(BaseDoc):
     title: str
     description: str
     thumbnail: ImageDoc
     video: VideoDoc
-    tensor: AnyTensor = Field(space='cosine', dim=256)
+    tensor: AnyTensor = Field(space='cosine_sim', dim=256)
 
 
 # create a Document Index
-doc_index = InMemoryExactNNIndex[YouTubeVideoDoc](work_dir='/tmp2')
+doc_index = InMemoryExactNNIndex[YouTubeVideoDoc]()
 
 # create some data
 index_docs = [
@@ -540,18 +540,18 @@ The `MyDoc` contains a `DocList` of `VideoDoc`, which contains a `DocList` of `I
 ```python
 class ImageDoc(BaseDoc):
     url: ImageUrl
-    tensor_image: AnyTensor = Field(space='cosine', dim=64)
+    tensor_image: AnyTensor = Field(space='cosine_sim', dim=64)
 
 
 class VideoDoc(BaseDoc):
     url: VideoUrl
     images: DocList[ImageDoc]
-    tensor_video: AnyTensor = Field(space='cosine', dim=128)
+    tensor_video: AnyTensor = Field(space='cosine_sim', dim=128)
 
 
 class MyDoc(BaseDoc):
     docs: DocList[VideoDoc]
-    tensor: AnyTensor = Field(space='cosine', dim=256)
+    tensor: AnyTensor = Field(space='cosine_sim', dim=256)
 
 
 # create a Document Index
@@ -601,56 +601,4 @@ root_docs, sub_docs, scores = doc_index.find_subindex(
 root_docs, sub_docs, scores = doc_index.find_subindex(
     np.ones(64), subindex='docs__images', search_field='tensor_image', limit=3
 )
-```
-
-### Update elements
-In order to update a Document inside the index, you only need to reindex it with the updated attributes.
-
-First lets create a schema for our Index
-```python
-import numpy as np
-from docarray import BaseDoc, DocList
-from docarray.typing import NdArray
-from docarray.index import InMemoryExactNNIndex
-class MyDoc(BaseDoc):
-    text: str
-    embedding: NdArray[128]
-```
-Now we can instantiate our Index and index some data.
-
-```python
-docs = DocList[MyDoc](
-    [MyDoc(embedding=np.random.rand(10), text=f'I am the first version of Document {i}') for i in range(100)]
-)
-index = InMemoryExactNNIndex[MyDoc]()
-index.index(docs)
-assert index.num_docs() == 100
-```
-
-Now we can find relevant documents
-
-```python
-res = index.find(query=docs[0], search_field='tens', limit=100)
-assert len(res.documents) == 100
-for doc in res.documents:
-    assert 'I am the first version' in doc.text
-```
-
-and update all of the text of this documents and reindex them
-
-```python
-for i, doc in enumerate(docs):
-    doc.text = f'I am the second version of Document {i}'
-
-index.index(docs)
-assert index.num_docs() == 100
-```
-
-When we retrieve them again we can see that their text attribute has been updated accordingly
-
-```python
-res = index.find(query=docs[0], search_field='tens', limit=100)
-assert len(res.documents) == 100
-for doc in res.documents:
-    assert 'I am the second version' in doc.text
-```
+```