Skip to content

Commit 6b07992

Browse files
committed
perf: optimize document getter
1 parent 80c186a commit 6b07992

File tree

6 files changed

+118
-146
lines changed

6 files changed

+118
-146
lines changed

docarray/array/storage/base/seqlike.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ def insert(self, index: int, value: 'Document'):
1313
:param index: Position of the insertion.
1414
:param value: The doc needs to be inserted.
1515
"""
16-
self._set_doc_by_id(value.id, value)
16+
self._set_doc_by_id(None, value)
1717
self._offset2ids.insert(index, value.id)
1818

1919
def append(self, value: 'Document'):
2020
"""Append `doc` to the end of the array.
2121
2222
:param value: The doc needs to be appended.
2323
"""
24-
self._set_doc_by_id(value.id, value)
24+
self._set_doc_by_id(None, value)
2525
self._offset2ids.append(value.id)
2626

2727
@abstractmethod

docarray/array/storage/memory/getsetdel.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ def _del_doc_by_id(self, _id: str):
1616
del self._data[_id]
1717

1818
def _set_doc_by_id(self, _id: str, value: 'Document'):
19-
if _id != value.id:
19+
_vid = value.id
20+
if _id is not None and _id != _vid:
2021
del self._data[_id]
21-
self._data[value.id] = value
22+
self._data[_vid] = value
2223

2324
def _set_doc_value_pairs(
2425
self, docs: Iterable['Document'], values: Sequence['Document']

docarray/document/data.py

Lines changed: 23 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,36 @@
1-
import mimetypes
21
import os
3-
from collections import defaultdict
42
from dataclasses import dataclass, field, fields
5-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
3+
from typing import TYPE_CHECKING, Dict, List, Tuple, Union
64

75
if TYPE_CHECKING:
86
from ..score import NamedScore
97
from .. import DocumentArray, Document
108
from ..types import ArrayType, StructValueType, DocumentContentType
119

12-
default_values = dict(
13-
granularity=0,
14-
adjacency=0,
15-
parent_id='',
16-
blob=b'',
17-
text='',
18-
weight=0.0,
19-
uri='',
20-
mime_type='',
21-
tags=dict,
22-
offset=0.0,
23-
location=list,
24-
modality='',
25-
evaluations='Dict[str, NamedScore]',
26-
scores='Dict[str, NamedScore]',
27-
chunks='ChunkArray',
28-
matches='MatchArray',
29-
timestamps=dict,
30-
)
31-
32-
_all_mime_types = set(mimetypes.types_map.values())
33-
3410

3511
@dataclass(unsafe_hash=True)
3612
class DocumentData:
3713
_reference_doc: 'Document' = field(hash=False, compare=False)
3814
id: str = field(default_factory=lambda: os.urandom(16).hex())
39-
parent_id: Optional[str] = None
40-
granularity: Optional[int] = None
41-
adjacency: Optional[int] = None
42-
blob: Optional[bytes] = None
43-
tensor: Optional['ArrayType'] = field(default=None, hash=False, compare=False)
44-
mime_type: Optional[str] = None # must be put in front of `text` `content`
45-
text: Optional[str] = None
46-
content: Optional['DocumentContentType'] = None
47-
weight: Optional[float] = None
48-
uri: Optional[str] = None
49-
tags: Optional[Dict[str, 'StructValueType']] = None
50-
offset: Optional[float] = None
51-
location: Optional[List[float]] = None
52-
embedding: Optional['ArrayType'] = field(default=None, hash=False, compare=False)
53-
modality: Optional[str] = None
54-
evaluations: Optional[Dict[str, Union['NamedScore', Dict]]] = None
55-
scores: Optional[Dict[str, Union['NamedScore', Dict]]] = None
56-
chunks: Optional['DocumentArray'] = None
57-
matches: Optional['DocumentArray'] = None
15+
parent_id: str = ''
16+
granularity: int = 0
17+
adjacency: int = 0
18+
blob: bytes = b''
19+
tensor: 'ArrayType' = field(default=None, hash=False, compare=False)
20+
mime_type: str = '' # must be put in front of `text` `content`
21+
text: str = ''
22+
content: 'DocumentContentType' = None
23+
weight: float = 0.0
24+
uri: str = ''
25+
tags: Dict[str, 'StructValueType'] = field(default_factory=dict)
26+
offset: float = 0.0
27+
location: List[float] = field(default_factory=list)
28+
embedding: 'ArrayType' = field(default=None, hash=False, compare=False)
29+
modality: str = ''
30+
evaluations: Dict[str, Union['NamedScore', Dict]] = None
31+
scores: Dict[str, Union['NamedScore', Dict]] = None
32+
chunks: 'DocumentArray' = None
33+
matches: 'DocumentArray' = None
5834

5935
@property
6036
def _non_empty_fields(self) -> Tuple[str]:
@@ -64,48 +40,9 @@ def _non_empty_fields(self) -> Tuple[str]:
6440
if not f_name.startswith('_'):
6541
v = getattr(self, f_name)
6642
if v is not None:
67-
if f_name not in default_values:
43+
if f_name in ('embedding', 'tensor'):
44+
r.append(f_name)
45+
elif v:
6846
r.append(f_name)
69-
else:
70-
dv = default_values[f_name]
71-
if dv in (
72-
'ChunkArray',
73-
'MatchArray',
74-
'DocumentArray',
75-
list,
76-
dict,
77-
'Dict[str, NamedScore]',
78-
):
79-
if v:
80-
r.append(f_name)
81-
elif v != dv:
82-
r.append(f_name)
8347

8448
return tuple(r)
85-
86-
def _set_default_value_if_none(self, key):
87-
if getattr(self, key) is None:
88-
v = default_values.get(key, None)
89-
if v is not None:
90-
if v == 'DocumentArray':
91-
from .. import DocumentArray
92-
93-
setattr(self, key, DocumentArray())
94-
elif v == 'ChunkArray':
95-
from ..array.chunk import ChunkArray
96-
97-
setattr(
98-
self, key, ChunkArray(None, reference_doc=self._reference_doc)
99-
)
100-
elif v == 'MatchArray':
101-
from ..array.match import MatchArray
102-
103-
setattr(
104-
self, key, MatchArray(None, reference_doc=self._reference_doc)
105-
)
106-
elif v == 'Dict[str, NamedScore]':
107-
from ..score import NamedScore
108-
109-
setattr(self, key, defaultdict(NamedScore))
110-
else:
111-
setattr(self, key, v() if callable(v) else v)

docarray/document/mixins/_property.py

Lines changed: 22 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# auto-generated from /Users/hanxiao/Documents/docarray/scripts/gen_doc_property_mixin.py
2-
from typing import TYPE_CHECKING, Dict, List, Optional
2+
from typing import TYPE_CHECKING, Dict, List, Union
33

44
if TYPE_CHECKING:
55
from ...score import NamedScore
@@ -12,178 +12,158 @@
1212
class _PropertyMixin:
1313
@property
1414
def id(self) -> str:
15-
self._data._set_default_value_if_none('id')
1615
return self._data.id
1716

1817
@id.setter
1918
def id(self, value: str):
2019
self._data.id = value
2120

2221
@property
23-
def parent_id(self) -> Optional[str]:
24-
self._data._set_default_value_if_none('parent_id')
22+
def parent_id(self) -> str:
2523
return self._data.parent_id
2624

2725
@parent_id.setter
2826
def parent_id(self, value: str):
2927
self._data.parent_id = value
3028

3129
@property
32-
def granularity(self) -> Optional[int]:
33-
self._data._set_default_value_if_none('granularity')
30+
def granularity(self) -> int:
3431
return self._data.granularity
3532

3633
@granularity.setter
3734
def granularity(self, value: int):
3835
self._data.granularity = value
3936

4037
@property
41-
def adjacency(self) -> Optional[int]:
42-
self._data._set_default_value_if_none('adjacency')
38+
def adjacency(self) -> int:
4339
return self._data.adjacency
4440

4541
@adjacency.setter
4642
def adjacency(self, value: int):
4743
self._data.adjacency = value
4844

4945
@property
50-
def blob(self) -> Optional[bytes]:
51-
self._data._set_default_value_if_none('blob')
46+
def blob(self) -> bytes:
5247
return self._data.blob
5348

5449
@blob.setter
5550
def blob(self, value: bytes):
5651
self._data.blob = value
5752

5853
@property
59-
def tensor(self) -> Optional['ArrayType']:
60-
self._data._set_default_value_if_none('tensor')
54+
def tensor(self) -> 'ArrayType':
6155
return self._data.tensor
6256

6357
@tensor.setter
6458
def tensor(self, value: 'ArrayType'):
6559
self._data.tensor = value
6660

6761
@property
68-
def mime_type(self) -> Optional[str]:
69-
self._data._set_default_value_if_none('mime_type')
62+
def mime_type(self) -> str:
7063
return self._data.mime_type
7164

7265
@mime_type.setter
7366
def mime_type(self, value: str):
7467
self._data.mime_type = value
7568

7669
@property
77-
def text(self) -> Optional[str]:
78-
self._data._set_default_value_if_none('text')
70+
def text(self) -> str:
7971
return self._data.text
8072

8173
@text.setter
8274
def text(self, value: str):
8375
self._data.text = value
8476

8577
@property
86-
def content(self) -> Optional['DocumentContentType']:
87-
self._data._set_default_value_if_none('content')
78+
def content(self) -> 'DocumentContentType':
8879
return self._data.content
8980

9081
@content.setter
9182
def content(self, value: 'DocumentContentType'):
9283
self._data.content = value
9384

9485
@property
95-
def weight(self) -> Optional[float]:
96-
self._data._set_default_value_if_none('weight')
86+
def weight(self) -> float:
9787
return self._data.weight
9888

9989
@weight.setter
10090
def weight(self, value: float):
10191
self._data.weight = value
10292

10393
@property
104-
def uri(self) -> Optional[str]:
105-
self._data._set_default_value_if_none('uri')
94+
def uri(self) -> str:
10695
return self._data.uri
10796

10897
@uri.setter
10998
def uri(self, value: str):
11099
self._data.uri = value
111100

112101
@property
113-
def tags(self) -> Optional[Dict[str, 'StructValueType']]:
114-
self._data._set_default_value_if_none('tags')
102+
def tags(self) -> Dict[str, 'StructValueType']:
115103
return self._data.tags
116104

117105
@tags.setter
118106
def tags(self, value: Dict[str, 'StructValueType']):
119107
self._data.tags = value
120108

121109
@property
122-
def offset(self) -> Optional[float]:
123-
self._data._set_default_value_if_none('offset')
110+
def offset(self) -> float:
124111
return self._data.offset
125112

126113
@offset.setter
127114
def offset(self, value: float):
128115
self._data.offset = value
129116

130117
@property
131-
def location(self) -> Optional[List[float]]:
132-
self._data._set_default_value_if_none('location')
118+
def location(self) -> List[float]:
133119
return self._data.location
134120

135121
@location.setter
136122
def location(self, value: List[float]):
137123
self._data.location = value
138124

139125
@property
140-
def embedding(self) -> Optional['ArrayType']:
141-
self._data._set_default_value_if_none('embedding')
126+
def embedding(self) -> 'ArrayType':
142127
return self._data.embedding
143128

144129
@embedding.setter
145130
def embedding(self, value: 'ArrayType'):
146131
self._data.embedding = value
147132

148133
@property
149-
def modality(self) -> Optional[str]:
150-
self._data._set_default_value_if_none('modality')
134+
def modality(self) -> str:
151135
return self._data.modality
152136

153137
@modality.setter
154138
def modality(self, value: str):
155139
self._data.modality = value
156140

157141
@property
158-
def evaluations(self) -> Optional[Dict[str, 'NamedScore']]:
159-
self._data._set_default_value_if_none('evaluations')
142+
def evaluations(self) -> Dict[str, Union['NamedScore', Dict]]:
160143
return self._data.evaluations
161144

162145
@evaluations.setter
163-
def evaluations(self, value: Dict[str, 'NamedScore']):
146+
def evaluations(self, value: Dict[str, Union['NamedScore', Dict]]):
164147
self._data.evaluations = value
165148

166149
@property
167-
def scores(self) -> Optional[Dict[str, 'NamedScore']]:
168-
self._data._set_default_value_if_none('scores')
150+
def scores(self) -> Dict[str, Union['NamedScore', Dict]]:
169151
return self._data.scores
170152

171153
@scores.setter
172-
def scores(self, value: Dict[str, 'NamedScore']):
154+
def scores(self, value: Dict[str, Union['NamedScore', Dict]]):
173155
self._data.scores = value
174156

175157
@property
176-
def chunks(self) -> Optional['ChunkArray']:
177-
self._data._set_default_value_if_none('chunks')
158+
def chunks(self) -> 'ChunkArray':
178159
return self._data.chunks
179160

180161
@chunks.setter
181162
def chunks(self, value: 'DocumentArray'):
182163
self._data.chunks = value
183164

184165
@property
185-
def matches(self) -> Optional['MatchArray']:
186-
self._data._set_default_value_if_none('matches')
166+
def matches(self) -> 'MatchArray':
187167
return self._data.matches
188168

189169
@matches.setter

0 commit comments

Comments
 (0)