Skip to content

Commit 31402d6

Browse files
revise collator
1 parent 355582b commit 31402d6

File tree

1 file changed

+66
-5
lines changed

1 file changed

+66
-5
lines changed

scriptum.js

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2455,6 +2455,30 @@ A.histo = f => init => xs => {
24552455
};
24562456

24572457

2458+
//█████ Natural Language Processing ███████████████████████████████████████████
2459+
2460+
2461+
// rerieve the co-occurrence of words in a sentence
2462+
2463+
A.cooccur = words => {
2464+
const pairs = [], n = words.length;
2465+
2466+
for (let i = 0; i < n - 1; i++) {
2467+
for (let j = i + 1; j < n; j++) {
2468+
const distance = j - i - 1;
2469+
2470+
pairs.push({
2471+
fst: words[i],
2472+
snd: words[j],
2473+
distance,
2474+
});
2475+
}
2476+
}
2477+
2478+
return pairs;
2479+
};
2480+
2481+
24582482
/*█████████████████████████████████████████████████████████████████████████████
24592483
███████████████████████████████████████████████████████████████████████████████
24602484
███████████████████████████████████ BOOLEAN ███████████████████████████████████
@@ -11888,7 +11912,7 @@ Object.defineProperty(S.Norm, "equivalence", {
1188811912
});
1188911913

1189011914

11891-
S.Norm.latinise = ({inclAlpha}) => doc => {
11915+
S.Norm.latinize = ({inclAlpha}) => doc => {
1189211916
let s = "";
1189311917

1189411918
for (const c of doc) {
@@ -12049,7 +12073,10 @@ Sensitivity modes:
1204912073
• variant: normal
1205012074
• case: a !== A
1205112075
• accent: a === A, a !== ä
12052-
• base: a === Ä */
12076+
• base: a === Ä
12077+
12078+
All listed sensitivity assume usage to be "sort". Comparisons are more strict
12079+
with usage set to "search" (ae === Ä) */
1205312080

1205412081

1205512082
// collator
@@ -12060,19 +12087,37 @@ S.Ctor = {};
1206012087
// options
1206112088

1206212089

12063-
S.Ctor.case = {
12090+
S.Ctor.caseSort = {
12091+
usage: "sort",
12092+
sensitivity: "case"
12093+
};
12094+
12095+
12096+
S.Ctor.caseSearch = {
1206412097
usage: "search",
1206512098
sensitivity: "case"
1206612099
};
1206712100

1206812101

12069-
S.Ctor.accent = {
12102+
S.Ctor.accentSort = {
12103+
usage: "sort",
12104+
sensitivity: "accent"
12105+
};
12106+
12107+
12108+
S.Ctor.accentSearch = {
1207012109
usage: "search",
1207112110
sensitivity: "accent"
1207212111
};
1207312112

1207412113

12075-
S.Ctor.base = {
12114+
S.Ctor.baseSort = {
12115+
usage: "sort",
12116+
sensitivity: "base",
12117+
};
12118+
12119+
12120+
S.Ctor.baseSearch = {
1207612121
usage: "search",
1207712122
sensitivity: "base",
1207812123
};
@@ -12084,6 +12129,22 @@ S.Ctor.cmp = (locale, opt) =>
1208412129
new Intl.Collator(locale.slice(0, 2), opt).compare;
1208512130

1208612131

12132+
// tries sort and search as usages to achieve equality
12133+
12134+
S.Ctor.cmpBiased = (locale, opt) => {
12135+
const o = Object.assign({}, opt, {usage: "sort"}),
12136+
p = Object.assign({}, opt, {usage: "search"}),
12137+
ctor = new Intl.Collator(locale.slice(0, 2), o).compare,
12138+
ctor2 = new Intl.Collator(locale.slice(0, 2), p).compare;
12139+
12140+
return (x, y) => {
12141+
const signum = ctor(x, y);
12142+
if (signum === ordering.eq) return signum;
12143+
else return ctor2(x, y);
12144+
};
12145+
};
12146+
12147+
1208712148
// pass key as option property k
1208812149

1208912150
S.Ctor.cmpObj = (locale, opt) => (o, p) =>

0 commit comments

Comments
 (0)