Skip to content

Commit a045b37

Browse files
authored
add docstr for filter function (#177)
* docs(filter): init docstr * fix: add regex filter test
1 parent fbde062 commit a045b37

File tree

3 files changed

+41
-1
lines changed

3 files changed

+41
-1
lines changed

docarray/array/mixins/find.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,35 @@ def _filter(
167167
limit: Optional[int] = None,
168168
only_id: bool = False,
169169
) -> 'DocumentArray':
170+
"""Returns a subset of documents by filtering by the given query.
171+
The query language we provide now is following the
172+
[MongoDB](https://docs.mongodb.com/manual/reference/operator/query/) query language. For example::
173+
174+
>>> docs._filter({'text': {'$eq': 'hello'}})
175+
176+
The above will return a `DocumentArray` in which each document has doc.text == 'hello'. And we also support
177+
placeholder format by using the following syntax::
178+
179+
>>> docs._filter({'text': {'$eq': '{tags__name}'}})
180+
181+
will return a `DocumentArray` in which each document has doc.text == doc.tags['name'].
182+
183+
Now, only the subset of MongoDB's query operators are supported:
184+
- `$eq` - Equal to (number, string)
185+
- `$ne` - Not equal to (number, string)
186+
- `$gt` - Greater than (number)
187+
- `$gte` - Greater than or equal to (number)
188+
- `$lt` - Less than (number)
189+
- `$lte` - Less than or equal to (number)
190+
- `$in` - Included in an array
191+
- `$nin` - Not included in an array
192+
- `$regex` - Match a specified regular expression
193+
194+
:param query: the input query dictionary.
195+
:param limit: the maximum number of matches, when not given defaults to 20.
196+
:param only_id: if set, then returning documents will only contain ``id``
197+
:return: a `DocumentArray` containing the `Document` objects for matching with the query.
198+
"""
170199
from ... import DocumentArray
171200
from ..queryset import QueryParser
172201

tests/unit/array/mixins/test_filter.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_empty_filter(docs):
2222
assert len(result) == 5
2323

2424

25-
def test_sample_filter(docs):
25+
def test_simple_filter(docs):
2626
result = docs.find({'text': {'$eq': 'hello'}})
2727
assert len(result) == 1
2828
assert result[0].text == 'hello'
@@ -31,6 +31,14 @@ def test_sample_filter(docs):
3131
assert len(result) == 1
3232
assert result[0].tags['x'] == 0.8
3333

34+
result = docs.find({'tags__name': {'$regex': '^h'}})
35+
assert len(result) == 2
36+
assert result[1].id == docs[1].id
37+
38+
result = docs.find({'text': {'$regex': '^h'}})
39+
assert len(result) == 1
40+
assert result[0].id == docs[0].id
41+
3442

3543
def test_logic_filter(docs):
3644
result = docs.find({'$or': {'tags__x': {'$gte': 0.1}, 'tags__y': {'$gte': 0.5}}})

tests/unit/array/test_lookup.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def test_lookup_ops(doc):
3131
assert lookup('tags__x__lte', 0.1, doc)
3232
assert not lookup('tags__y__lt', 1.5, doc)
3333

34+
assert lookup('text__regex', '^test', doc)
35+
assert not lookup('text__regex', '^est', doc)
36+
3437

3538
def test_lookup_pl(doc):
3639
from docarray.array.queryset.lookup import lookup

0 commit comments

Comments
 (0)