Skip to content

Commit 2aad8bd

Browse files
revise S.splitName + S.splitMergedWords
1 parent 759a439 commit 2aad8bd

File tree

1 file changed

+48
-6
lines changed

1 file changed

+48
-6
lines changed

scriptum.js

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,7 +1523,7 @@ A.retrieveDupes = xs => {
15231523
else uniqs.add(x);
15241524
}
15251525

1526-
return Array.from(dupes):
1526+
return Array.from(dupes);
15271527
};
15281528

15291529

@@ -9347,18 +9347,33 @@ S.splitAscii = s => {
93479347
};
93489348

93499349

9350-
S.splitName = s => {
9350+
S.splitName = titles => s => {
9351+
const titles2 = [];
93519352

9352-
// aslo split edge case "A.B.Foo" to ["A.", "B.", "Foo"]
9353+
for (const title of titles) {
9354+
if (R.iv(`\\b${R.escape(title)}( |\\b)`).test(s)) {
9355+
s = s.replaceAll(R(`\\b${R.escape(title)}( |\\b)`, "giv"), "");
9356+
titles2.push(title);
9357+
}
9358+
}
9359+
9360+
// "Bar, Foo" format
93539361

93549362
if (/,/.test(s)) {
93559363
const [lastName, firstName] = s.split(/, +/),
93569364
[firstName2, ...middleNames] = firstName.split(/[ \-]|(?<=\.)(?=\p{L})/v),
93579365
lastNames = lastName.split(/[ \-]/);
93589366

9359-
return {firstName: firstName2, middleNames, lastNames};
9367+
return {
9368+
title: titles2.join(" "),
9369+
firstName: firstName2,
9370+
middleNames,
9371+
lastNames
9372+
};
93609373
}
93619374

9375+
// "Foo Bar" format
9376+
93629377
else {
93639378
const compos = s.split(/ +/);
93649379

@@ -9369,12 +9384,39 @@ S.splitName = s => {
93699384
middleNames = compos2.slice(1),
93709385
lastNames = compos[compos.length - 1].split(/-/);
93719386

9372-
return {firstName, middleNames, lastNames};
9387+
return {
9388+
title: titles2.join(" "),
9389+
firstName,
9390+
middleNames,
9391+
lastNames
9392+
};
93739393
}
93749394
};
93759395

93769396

9377-
S.splitMergedWords = s => s => s.split(/(?<=\p{Ll})(?=\p{Lu})/v).join(" ");
9397+
S.splitMergedWords = exceptions => s => {
9398+
const xs = s.split(" ").reduce((acc, s2) =>
9399+
A.pushn(s2.split(/(?<=\p{Ll})(?=\p{Lu})/v)) (acc), []);
9400+
9401+
const ys = [];
9402+
9403+
for (let i = 0; i < xs.length; i++) {
9404+
for (const exception of exceptions) {
9405+
if (R.v(`\\b${R.escape(exception)}$`).test(xs[i])) {
9406+
if (i < xs.length - 1) {
9407+
ys.push(xs[i] + xs[i + 1]);
9408+
i++;
9409+
}
9410+
9411+
else ys.push(xs[i]);
9412+
}
9413+
9414+
else ys.push(xs[i]);
9415+
}
9416+
}
9417+
9418+
return ys.join(" ");
9419+
};
93789420

93799421

93809422
//█████ Retrieval █████████████████████████████████████████████████████████████

0 commit comments

Comments
 (0)