Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 0 additions & 52 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,58 +138,6 @@ jobs:
flags: ${{ steps.test.outputs.codecov_flag }}
fail_ci_if_error: false



docarray-test-jac:
needs: [import-test]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.8]
pydantic-version: ["pydantic-v2", "pydantic-v1"]
steps:
- uses: actions/[email protected]
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Prepare environment
run: |
python -m pip install --upgrade pip
python -m pip install poetry
poetry install --all-extras
./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
poetry run pip install elasticsearch==8.6.2
poetry run pip uninstall -y torch
poetry run pip install torch
sudo apt-get update
sudo apt-get install --no-install-recommends ffmpeg

- name: Test
id: test
run: |
poetry run pytest -m "not (tensorflow or benchmark or index or jax)" --cov=docarray --cov-report=xml tests/integrations/store/test_jac.py
echo "flag it as docarray for codeoverage"
echo "codecov_flag=docarray" >> $GITHUB_OUTPUT
timeout-minutes: 30
env:
JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}"
- name: Check codecov file
id: check_files
uses: andstor/file-existence-action@v1
with:
files: "coverage.xml"
- name: Upload coverage from test to Codecov
uses: codecov/[email protected]
if: steps.check_files.outputs.files_exists == 'true' && ${{ matrix.python-version }} == '3.8'
with:
file: coverage.xml
name: benchmark-test-codecov
flags: ${{ steps.test.outputs.codecov_flag }}
fail_ci_if_error: false


docarray-test-proto3:
needs: [import-test]
runs-on: ubuntu-latest
Expand Down
28 changes: 6 additions & 22 deletions docarray/array/doc_list/pushpull.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Dict,
Iterable,
Iterator,
Optional,
Tuple,
Type,
TypeVar,
Expand All @@ -15,7 +14,7 @@
from typing_extensions import Literal
from typing_inspect import get_args

PUSH_PULL_PROTOCOL = Literal['jac', 's3', 'file']
PUSH_PULL_PROTOCOL = Literal['s3', 'file']
SUPPORTED_PUSH_PULL_PROTOCOLS = get_args(PUSH_PULL_PROTOCOL)

if TYPE_CHECKING: # pragma: no cover
Expand Down Expand Up @@ -55,18 +54,13 @@ def get_pushpull_backend(
"""
Get the backend for the given protocol.

:param protocol: the protocol to use, e.g. 'jac', 'file', 's3'
:param protocol: the protocol to use, e.g. 'file', 's3'
:return: the backend class
"""
if protocol in cls.__backends__:
return cls.__backends__[protocol]

if protocol == 'jac':
from docarray.store.jac import JACDocStore

cls.__backends__[protocol] = JACDocStore
logging.debug('Loaded Jina AI Cloud backend')
elif protocol == 'file':
if protocol == 'file':
from docarray.store.file import FileDocStore

cls.__backends__[protocol] = FileDocStore
Expand All @@ -84,46 +78,36 @@ def get_pushpull_backend(
def push(
self,
url: str,
public: bool = True,
show_progress: bool = False,
branding: Optional[Dict] = None,
**kwargs,
) -> Dict:
"""Push this `DocList` object to the specified url.

:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
Setting this to false will restrict access to only the creator.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
"""
logging.info(f'Pushing {len(self)} docs to {url}')
protocol, name = self.__class__.resolve_url(url)
return self.__class__.get_pushpull_backend(protocol).push(
self, name, public, show_progress, branding # type: ignore
self, name, show_progress # type: ignore
)

@classmethod
def push_stream(
cls: Type[SelfPushPullMixin],
docs: Iterator['BaseDoc'],
url: str,
public: bool = True,
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push a stream of documents to the specified url.

:param docs: a stream of documents
:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
"""
logging.info(f'Pushing stream to {url}')
protocol, name = cls.resolve_url(url)
return cls.get_pushpull_backend(protocol).push_stream(
docs, name, public, show_progress, branding
)
return cls.get_pushpull_backend(protocol).push_stream(docs, name, show_progress)

@classmethod
def pull(
Expand Down
6 changes: 1 addition & 5 deletions docarray/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,14 @@
)

if TYPE_CHECKING:
from docarray.store.jac import JACDocStore # noqa: F401
from docarray.store.s3 import S3DocStore # noqa: F401

__all__ = ['FileDocStore']


def __getattr__(name: str):
lib: types.ModuleType
if name == 'JACDocStore':
import_library('hubble', raise_error=True)
import docarray.store.jac as lib
elif name == 'S3DocStore':
if name == 'S3DocStore':
import_library('smart_open', raise_error=True)
import_library('botocore', raise_error=True)
import_library('boto3', raise_error=True)
Expand Down
10 changes: 1 addition & 9 deletions docarray/store/abstract_doc_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Dict, Iterator, List, Optional, Type
from typing import Dict, Iterator, List, Type

from typing_extensions import TYPE_CHECKING

Expand Down Expand Up @@ -35,17 +35,13 @@ def delete(name: str, missing_ok: bool) -> bool:
def push(
docs: 'DocList',
name: str,
public: bool,
show_progress: bool,
branding: Optional[Dict],
) -> Dict:
"""Push this DocList to the specified name.

:param docs: The DocList to push
:param name: The name to push to
:param public: Whether the DocList should be publicly accessible
:param show_progress: If true, a progress bar will be displayed.
:param branding: Branding information to be stored with the DocList
"""
...

Expand All @@ -54,17 +50,13 @@ def push(
def push_stream(
docs: Iterator['BaseDoc'],
url: str,
public: bool = True,
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push a stream of documents to the specified name.

:param docs: a stream of documents
:param url: The name to push to
:param public: Whether the DocList should be publicly accessible
:param show_progress: If true, a progress bar will be displayed.
:param branding: Branding information to be stored with the DocList
"""
...

Expand Down
15 changes: 2 additions & 13 deletions docarray/store/file.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from pathlib import Path
from typing import Dict, Iterator, List, Optional, Type, TypeVar
from typing import Dict, Iterator, List, Type, TypeVar

from typing_extensions import TYPE_CHECKING

Expand Down Expand Up @@ -98,40 +98,29 @@ def push(
cls: Type[SelfFileDocStore],
docs: 'DocList',
name: str,
public: bool,
show_progress: bool,
branding: Optional[Dict],
) -> Dict:
"""Push this [`DocList`][docarray.DocList] object to the specified file path.

:param docs: The `DocList` to push.
:param name: The file path to push to.
:param public: Not used by the ``file`` protocol.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Not used by the ``file`` protocol.
"""
return cls.push_stream(iter(docs), name, public, show_progress, branding)
return cls.push_stream(iter(docs), name, show_progress)

@classmethod
def push_stream(
cls: Type[SelfFileDocStore],
docs: Iterator['BaseDoc'],
name: str,
public: bool = True,
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push a stream of documents to the specified file path.

:param docs: a stream of documents
:param name: The file path to push to.
:param public: Not used by the ``file`` protocol.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Not used by the ``file`` protocol.
"""
if branding is not None:
logging.warning('branding is not supported for "file" protocol')

source = _to_binary_stream(
docs, protocol='protobuf', compress='gzip', show_progress=show_progress
)
Expand Down
Loading