Skip to content

Commit 954344b

Browse files
k0ushalpalashkaria
andauthored
Feature/new consolidation config (#22125)
* Bumped iresearch * Migrated to new consolidationPolicy properties (maxSkewThreshold, minDeletionRatio) * chore: update search view fields * Fixed IResearch unit tests * Fixed Arangosearch DDL tests * Bumped rta-makedata * Updated Changelog --------- Co-authored-by: Palash Karia <[email protected]>
1 parent 85bcf56 commit 954344b

File tree

12 files changed

+610
-476
lines changed

12 files changed

+610
-476
lines changed

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
devel
22
-----
33

4+
* COR-13: New ArangoSearch consolidation policy
5+
- new algorithm that uses maxSkewThreshold and minDeletionRatio
6+
- Bumped ArangoSearch
7+
48
* The query explainer now displays the vector index used by EnumerateNearVectoNode.
59

610
* FE-636: bump webpack-dev-middleware from 5.3.3 to 5.3.4.

arangod/IResearch/IResearchDataStoreMeta.cpp

Lines changed: 11 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -92,22 +92,6 @@ createConsolidationPolicy<irs::index_utils::ConsolidateTier>(
9292
irs::index_utils::ConsolidateTier options;
9393
VPackBuilder properties;
9494

95-
{
96-
// optional size_t
97-
constexpr std::string_view kFieldName = "segmentsBytesFloor";
98-
99-
auto field = slice.get(kFieldName);
100-
if (!field.isNone()) {
101-
if (!field.isNumber<size_t>()) {
102-
errorField = kFieldName;
103-
104-
return {};
105-
}
106-
107-
options.floor_segment_bytes = field.getNumber<size_t>();
108-
}
109-
}
110-
11195
{
11296
// optional size_t
11397
constexpr std::string_view kFieldName = "segmentsBytesMax";
@@ -125,60 +109,44 @@ createConsolidationPolicy<irs::index_utils::ConsolidateTier>(
125109
}
126110

127111
{
128-
// optional size_t
129-
constexpr std::string_view kFieldName = "segmentsMax";
130-
131-
auto field = slice.get(kFieldName);
132-
if (!field.isNone()) {
133-
if (!field.isNumber<size_t>()) {
134-
errorField = kFieldName;
135-
136-
return {};
137-
}
138-
139-
options.max_segments = field.getNumber<size_t>();
140-
}
141-
}
142-
143-
{
144-
// optional size_t
145-
constexpr std::string_view kFieldName = "segmentsMin";
112+
// optional double
113+
constexpr std::string_view kFieldName = "maxSkewThreshold";
146114

147115
auto field = slice.get(kFieldName);
148116
if (!field.isNone()) {
149-
if (!field.isNumber<size_t>()) {
117+
if (!field.isNumber<double>() || field.getNumber<double>() < 0. ||
118+
field.getNumber<double>() > 1.) {
150119
errorField = kFieldName;
151120

152121
return {};
153122
}
154123

155-
options.min_segments = field.getNumber<size_t>();
124+
options.max_skew_threshold = field.getNumber<double>();
156125
}
157126
}
158127

159128
{
160129
// optional double
161-
constexpr std::string_view kFieldName = "minScore";
130+
constexpr std::string_view kFieldName = "minDeletionRatio";
162131

163132
auto field = slice.get(kFieldName);
164133
if (!field.isNone()) {
165-
if (!field.isNumber<double>()) {
134+
if (!field.isNumber<double>() || field.getNumber<double>() < 0. ||
135+
field.getNumber<double>() > 1.) {
166136
errorField = kFieldName;
167137

168138
return {};
169139
}
170140

171-
options.min_score = field.getNumber<double>();
141+
options.min_deletion_ratio = field.getNumber<double>();
172142
}
173143
}
174144

175145
properties.openObject();
176146
properties.add("type", VPackValue(kPolicyTier));
177-
properties.add("segmentsBytesFloor", VPackValue(options.floor_segment_bytes));
178147
properties.add("segmentsBytesMax", VPackValue(options.max_segments_bytes));
179-
properties.add("segmentsMax", VPackValue(options.max_segments));
180-
properties.add("segmentsMin", VPackValue(options.min_segments));
181-
properties.add("minScore", VPackValue(options.min_score));
148+
properties.add("maxSkewThreshold", VPackValue(options.max_skew_threshold));
149+
properties.add("minDeletionRatio", VPackValue(options.min_deletion_ratio));
182150
properties.close();
183151

184152
return {irs::index_utils::MakePolicy(options), std::move(properties)};

js/apps/system/_admin/aardvark/APP/react/src/views/views/arangoSearchView/SearchJSONSchema.ts

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -219,41 +219,25 @@ export const arangoSearchViewJSONSchema: JSONSchemaType<ArangoSearchViewProperti
219219
type: {
220220
const: "tier"
221221
},
222-
segmentsMin: {
223-
type: "integer",
224-
nullable: false,
225-
minimum: 0,
226-
maximum: {
227-
$data: "1/segmentsMax"
228-
},
229-
default:50
230-
},
231-
segmentsMax: {
232-
type: "integer",
233-
nullable: false,
234-
minimum: {
235-
$data: "1/segmentsMin"
236-
},
237-
default: 200
238-
},
239222
segmentsBytesMax: {
240223
type: "integer",
241224
nullable: false,
242225
minimum: 0,
243226
default: 8589934592
244227
},
245-
segmentsBytesFloor: {
246-
type: "integer",
228+
maxSkewThreshold: {
229+
type: "number",
247230
nullable: false,
248-
minimum: 0,
249-
default: 25165824
231+
minimum: 0.0,
232+
maximum: 1.0,
233+
default: 0.4
250234
},
251-
minScore: {
235+
minDeletionRatio: {
252236
type: "number",
253237
nullable: false,
254-
minimum: 0,
255-
maximum: 1,
256-
default: 0
238+
minimum: 0.0,
239+
maximum: 1.0,
240+
default: 0.5
257241
}
258242
},
259243
additionalProperties: false

js/apps/system/_admin/aardvark/APP/react/src/views/views/arangoSearchView/useArangoSearchFieldsData.ts

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -118,32 +118,23 @@ const arangoSearchFields = [
118118
] as FieldType[];
119119

120120
const tierConsolidationPolicyFields = [
121-
{
122-
name: "consolidationPolicy.segmentsMin",
123-
label: "Segments Min",
124-
type: "number",
125-
tooltip:
126-
"The minimum number of segments that will be evaluated as candidates for consolidation."
127-
},
128-
{
129-
name: "consolidationPolicy.segmentsMax",
130-
label: "Segments Max",
131-
type: "number",
132-
tooltip:
133-
"The maximum number of segments that will be evaluated as candidates for consolidation."
134-
},
135121
{
136122
name: "consolidationPolicy.segmentsBytesMax",
137123
label: "Segments Bytes Max",
138124
type: "number",
139125
tooltip: "Maximum allowed size of all consolidated segments in bytes."
140126
},
141127
{
142-
name: "consolidationPolicy.segmentsBytesFloor",
143-
label: "Segments Bytes Floor",
128+
name: "consolidationPolicy.maxSkewThreshold",
129+
label: "Max Skew Threshold",
144130
type: "number",
145-
tooltip:
146-
"Defines the value (in bytes) to treat all smaller segments as equal for consolidation selection."
131+
tooltip: "The maximum allowed skew threshold for consolidation."
132+
},
133+
{
134+
name: "consolidationPolicy.minDeletionRatio",
135+
label: "Min Deletion Ratio",
136+
type: "number",
137+
tooltip: "The minimum allowed deletion ratio for consolidation."
147138
}
148139
];
149140
const bytesAccumConsolidationPolicyFields = [

tests/IResearch/IResearchInvertedIndexMetaTest.cpp

Lines changed: 16 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -825,14 +825,6 @@ TEST_F(IResearchInvertedIndexMetaTest, testDefaults) {
825825
auto type = typeSlice.copyString();
826826
ASSERT_EQ("tier", type);
827827
}
828-
{
829-
ASSERT_TRUE(propSlice.hasKey("segmentsBytesFloor"));
830-
auto valueSlice = propSlice.get("segmentsBytesFloor");
831-
ASSERT_TRUE(valueSlice.isNumber());
832-
size_t segmentsBytesFloor;
833-
ASSERT_TRUE(getNumber(segmentsBytesFloor, valueSlice));
834-
ASSERT_EQ(24 * (1ul << 20), segmentsBytesFloor);
835-
}
836828
{
837829
ASSERT_TRUE(propSlice.hasKey("segmentsBytesMax"));
838830
auto valueSlice = propSlice.get("segmentsBytesMax");
@@ -842,28 +834,27 @@ TEST_F(IResearchInvertedIndexMetaTest, testDefaults) {
842834
ASSERT_EQ(8 * (1ul << 30), segmentsBytesMax);
843835
}
844836
{
845-
ASSERT_TRUE(propSlice.hasKey("segmentsMax"));
846-
auto typeSlice = propSlice.get("segmentsMax");
847-
ASSERT_TRUE(typeSlice.isNumber());
848-
size_t segmentsMax;
849-
ASSERT_TRUE(getNumber(segmentsMax, typeSlice));
850-
ASSERT_EQ(200, segmentsMax);
837+
ASSERT_TRUE(propSlice.hasKey("maxSkewThreshold"));
838+
auto valueSlice = propSlice.get("maxSkewThreshold");
839+
ASSERT_TRUE(valueSlice.isNumber());
840+
double maxSkewThreshold;
841+
ASSERT_TRUE(getNumber<double>(maxSkewThreshold, valueSlice));
842+
ASSERT_EQ(0.4, maxSkewThreshold);
851843
}
852844
{
853-
ASSERT_TRUE(propSlice.hasKey("segmentsMin"));
854-
auto valueSlice = propSlice.get("segmentsMin");
845+
ASSERT_TRUE(propSlice.hasKey("minDeletionRatio"));
846+
auto valueSlice = propSlice.get("minDeletionRatio");
855847
ASSERT_TRUE(valueSlice.isNumber());
856-
size_t segmentsMin;
857-
ASSERT_TRUE(getNumber(segmentsMin, valueSlice));
858-
ASSERT_EQ(50, segmentsMin);
848+
double minDeletionRatio;
849+
ASSERT_TRUE(getNumber<double>(minDeletionRatio, valueSlice));
850+
ASSERT_EQ(0.5, minDeletionRatio);
859851
}
852+
// Old consolidationPolicy properties.
860853
{
861-
ASSERT_TRUE(propSlice.hasKey("minScore"));
862-
auto valueSlice = propSlice.get("minScore");
863-
ASSERT_TRUE(valueSlice.isNumber());
864-
size_t minScore;
865-
ASSERT_TRUE(getNumber(minScore, valueSlice));
866-
ASSERT_EQ(0, minScore);
854+
ASSERT_FALSE(propSlice.hasKey("segmentsBytesFloor"));
855+
ASSERT_FALSE(propSlice.hasKey("segmentsMax"));
856+
ASSERT_FALSE(propSlice.hasKey("segmentsMin"));
857+
ASSERT_FALSE(propSlice.hasKey("minScore"));
867858
}
868859
}
869860

tests/IResearch/IResearchLinkTest.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2470,6 +2470,7 @@ TEST_F(IResearchLinkMetricsTest, TimeConsolidate) {
24702470
if (consolidationTime1 > 0 || consolidationTime2 > 0) {
24712471
return;
24722472
}
2473+
insert(200000, 300000, 2);
24732474
}
24742475
auto start = std::chrono::steady_clock::now();
24752476
auto check = [&] {

0 commit comments

Comments
 (0)