Skip to content

Commit c566401

Browse files
authored
fix: slow hnsw by caching num docs (#1706)
Signed-off-by: jupyterjazz <[email protected]>
1 parent b306c80 commit c566401

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

docarray/index/backends/hnswlib.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ def __init__(self, db_config=None, **kwargs):
127127
self._sqlite_cursor = self._sqlite_conn.cursor()
128128
self._create_docs_table()
129129
self._sqlite_conn.commit()
130+
self._num_docs = self._get_num_docs_sqlite()
130131
self._logger.info(f'{self.__class__.__name__} has been initialized')
131132

132133
@property
@@ -259,6 +260,7 @@ def index(self, docs: Union[BaseDoc, Sequence[BaseDoc]], **kwargs):
259260

260261
self._send_docs_to_sqlite(docs_validated)
261262
self._sqlite_conn.commit()
263+
self._num_docs = self._get_num_docs_sqlite()
262264

263265
def execute_query(self, query: List[Tuple[str, Dict]], *args, **kwargs) -> Any:
264266
"""
@@ -379,6 +381,7 @@ def _del_items(self, doc_ids: Sequence[str]):
379381

380382
self._delete_docs_from_sqlite(doc_ids)
381383
self._sqlite_conn.commit()
384+
self._num_docs = self._get_num_docs_sqlite()
382385

383386
def _get_items(self, doc_ids: Sequence[str], out: bool = True) -> Sequence[TSchema]:
384387
"""Get Documents from the hnswlib index, by `id`.
@@ -403,7 +406,7 @@ def num_docs(self) -> int:
403406
"""
404407
Get the number of documents.
405408
"""
406-
return self._get_num_docs_sqlite()
409+
return self._num_docs
407410

408411
###############################################
409412
# Helpers #

0 commit comments

Comments
 (0)