Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
3d45f57
feat: add apply function
Feb 28, 2023
9a07808
test: add benchmark tests
Feb 28, 2023
5709f97
fix: apply
Feb 28, 2023
366b6df
fix: benchmark test
Mar 1, 2023
6e8ff7c
test: benchmark
Mar 2, 2023
89eaf62
fix: apply
Mar 2, 2023
545d9ec
fix: clean up
Mar 2, 2023
d871363
chore: remove benchmark tests from general tests
Mar 2, 2023
8a436ae
chore: fix ci
Mar 2, 2023
c581b58
feat: add threading option and benchmark test
Mar 2, 2023
8bf57fb
test: use both backend options in tests
Mar 2, 2023
0c3524c
feat: add batching to abstract array
Mar 2, 2023
c335895
feat: add apply_batch and _map_batch and tests
Mar 2, 2023
cc23e4e
test: fix load from da
Mar 2, 2023
73c0d84
docs: update docstrings
Mar 2, 2023
b7c2cae
docs: add example for apply
Mar 2, 2023
3eb0c30
fix: mypy
Mar 2, 2023
7c6cb2f
refactor: clean up
Mar 2, 2023
afa5837
refactor: make batch method private
Mar 2, 2023
c69585a
fix: apply
Mar 2, 2023
8a3437a
Test: add for apply batch
Mar 2, 2023
66b78b3
fix: benchmark test increase ndocs
Mar 3, 2023
35e090a
test: clean up
Mar 3, 2023
3019522
test: try to fix
Mar 3, 2023
313d318
test: try to fix test
Mar 3, 2023
0afd5bd
fix: test
Mar 3, 2023
fdcfa23
fix: test
Mar 3, 2023
fc91dbf
fix: apply suggestions from code review
Mar 3, 2023
0d7cd1b
fix: remove print statemetns
Mar 3, 2023
b4c672b
fix: apply samis suggestion
Mar 3, 2023
18a377b
fix: add tests for func da to doc and da to other len da
Mar 3, 2023
245283f
fix: revert last commit
Mar 3, 2023
76fe8b7
test: add len assert
Mar 3, 2023
34b7f9c
test: add assertions
Mar 3, 2023
c7a968d
test: add test to for da extend in batch apply
Mar 3, 2023
6cf8ed2
test: extend with only one doc
Mar 3, 2023
5dc9e6d
test: fix
Mar 3, 2023
d3fc203
fix: test
Mar 3, 2023
45cdc4a
fix: test
Mar 3, 2023
9839602
fix: set docs in apply
Mar 3, 2023
87a93ff
fix: indices
Mar 3, 2023
eeb7fae
fix: indices
Mar 3, 2023
72aaf21
fix: indices
Mar 3, 2023
c0f8029
fix: indices
Mar 3, 2023
9b83c1f
fix:test
Mar 3, 2023
7638d86
fix: mypy
Mar 3, 2023
4a3a290
fix: type hint
Mar 3, 2023
38aae7a
fix: remove apply, only keep map
Mar 3, 2023
01900c9
refactor: map to map_docs
Mar 3, 2023
f6921e0
fix: apply suggestion
Mar 3, 2023
c3fb041
docs: add example usage
Mar 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
- name: Test
id: test
run: |
poetry run pytest -m "not tensorflow" ${{ matrix.test-path }}
poetry run pytest -m "not (tensorflow or benchmark)" ${{ matrix.test-path }}
timeout-minutes: 30
# env:
# JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}"
Expand Down Expand Up @@ -162,7 +162,7 @@ jobs:
- name: Test
id: test
run: |
poetry run pytest -m "not tensorflow" ${{ matrix.test-path }}
poetry run pytest -m "not (tensorflow or benchmark)" ${{ matrix.test-path }}
timeout-minutes: 30


Expand Down Expand Up @@ -222,10 +222,35 @@ jobs:
poetry run pytest -m 'tensorflow' tests
timeout-minutes: 30

docarray-test-benchmarks:
needs: [lint-ruff, check-black, import-test]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.7]
steps:
- uses: actions/[email protected]
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Prepare environment
run: |
python -m pip install --upgrade pip
python -m pip install poetry
poetry install --all-extras

- name: Test
id: test
run: |
poetry run pytest -m 'benchmark' tests
timeout-minutes: 30


# just for blocking the merge until all parallel core-test are successful
success-all-test:
needs: [docarray-test, docarray-test-proto3, docarray-test-tensorflow, import-test, check-black, check-mypy, lint-ruff]
needs: [docarray-test, docarray-test-proto3, docarray-test-tensorflow, docarray-test-benchmarks, import-test, check-black, check-mypy, lint-ruff]
if: always()
runs-on: ubuntu-latest
steps:
Expand Down
67 changes: 67 additions & 0 deletions docarray/array/abstract_array.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
import random
from abc import abstractmethod
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generator,
Generic,
Iterable,
List,
Sequence,
Type,
TypeVar,
Union,
cast,
overload,
)

import numpy as np

from docarray.base_document import BaseDocument
from docarray.display.document_array_summary import DocumentArraySummary
from docarray.typing import NdArray
Expand All @@ -24,6 +30,7 @@

T = TypeVar('T', bound='AnyDocumentArray')
T_doc = TypeVar('T_doc', bound=BaseDocument)
IndexIterType = Union[slice, Iterable[int], Iterable[bool], None]


class AnyDocumentArray(Sequence[T_doc], Generic[T_doc], AbstractType):
Expand Down Expand Up @@ -79,6 +86,30 @@ def _setter(self, value):

return cls.__typed_da__[cls][item]

@overload
def __getitem__(self: T, item: int) -> T_doc:
...

@overload
def __getitem__(self: T, item: IndexIterType) -> T:
...

@abstractmethod
def __getitem__(self, item: Union[int, IndexIterType]) -> Union[T_doc, T]:
...

@overload
def __setitem__(self: T, key: int, value: T_doc):
...

@overload
def __setitem__(self: T, key: IndexIterType, value: T):
...

@abstractmethod
def __setitem__(self: T, key: Union[int, IndexIterType], value: Union[T, T_doc]):
...

@abstractmethod
def _get_array_attribute(
self: T,
Expand Down Expand Up @@ -249,3 +280,39 @@ def summary(self):
Document type.
"""
DocumentArraySummary(self).summary()

def _batch(
self: T,
batch_size: int,
shuffle: bool = False,
show_progress: bool = False,
) -> Generator[T, None, None]:
"""
Creates a `Generator` that yields `DocumentArray` of size `batch_size`.
Note, that the last batch might be smaller than `batch_size`.

:param batch_size: Size of each generated batch.
:param shuffle: If set, shuffle the Documents before dividing into minibatches.
:param show_progress: if set, show a progress bar when batching documents.
:yield: a Generator of `DocumentArray`, each in the length of `batch_size`
"""
from rich.progress import track

if not (isinstance(batch_size, int) and batch_size > 0):
raise ValueError(
f'`batch_size` should be a positive integer, received: {batch_size}'
)

N = len(self)
indices = list(range(N))
n_batches = int(np.ceil(N / batch_size))

if shuffle:
random.shuffle(indices)

for i in track(
range(n_batches),
description='Batching documents',
disable=not show_progress,
):
yield self[indices[i * batch_size : (i + 1) * batch_size]]
4 changes: 2 additions & 2 deletions docarray/array/array/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,11 +177,11 @@ def __getitem__(self, item):
raise TypeError(f'Invalid type {type(head)} for indexing')

@overload
def __setitem__(self: T, key: IndexIterType, value: T):
def __setitem__(self: T, key: int, value: T_doc):
...

@overload
def __setitem__(self: T, key: int, value: T_doc):
def __setitem__(self: T, key: IndexIterType, value: T):
...

def __setitem__(self: T, key: Union[int, IndexIterType], value: Union[T, T_doc]):
Expand Down
4 changes: 1 addition & 3 deletions docarray/display/tensor_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ def __rich_console__(
from rich.segment import Segment
from rich.style import Style

tensor_normalized = comp_be.minmax_normalize(
comp_be.detach(self.tensor), (0, 5)
)
tensor_normalized = comp_be.minmax_normalize(t_squeezed, (0, 5))

hue = 0.75
saturation = 1.0
Expand Down
14 changes: 13 additions & 1 deletion docarray/helper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
from types import LambdaType
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Type

if TYPE_CHECKING:
from docarray import BaseDocument
Expand Down Expand Up @@ -138,3 +139,14 @@ def _get_field_type_by_access_path(
return None
else:
return None


def _is_lambda_or_partial_or_local_function(func: Callable[[Any], Any]) -> bool:
"""
Return True if `func` is lambda, local or partial function, else False.
"""
return (
(isinstance(func, LambdaType) and func.__name__ == '<lambda>')
or not hasattr(func, '__qualname__')
or ('<locals>' in func.__qualname__)
)
Loading