Last active
August 10, 2024 01:43
-
-
Save KBeDevel/25d608dfe32417067be337ab29a0c3a5 to your computer and use it in GitHub Desktop.
Similarity calculator for string inputs implemented in TypeScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Get the similarity percentage between two strings. | |
* Based on {@link https://gist.github.com/sumn2u/0e0b5d9505ad096284928a987ace13fb#file-jaro-wrinker-js} | |
*/ | |
export function calculateStringsSimilarity( | |
firstString: string, | |
secondString: string, | |
config?: { | |
/** | |
* If true, the function will return the similarity percentage as a float number between 0 and 1. | |
* If false, the function will return the similarity percentage as an float between 0 and 100. | |
*/ | |
asRatio?: boolean | |
}, | |
) { | |
if (firstString.trim() === secondString.trim()) | |
return config?.asRatio ? 1 : 100 | |
let matchesFound = 0 | |
const range = | |
Math.floor(Math.max(firstString.length, secondString.length) / 2) - 1 | |
const matchesInFirstString = Array.from({ length: firstString.length }) | |
const matchesInSecondString = Array.from({ length: secondString.length }) | |
for ( | |
let firstStringCharIndex = 0; | |
firstStringCharIndex < firstString.length; | |
++firstStringCharIndex | |
) { | |
const high = | |
firstStringCharIndex + range <= secondString.length | |
? firstStringCharIndex + range | |
: secondString.length - 1 | |
let low = firstStringCharIndex >= range ? firstStringCharIndex - range : 0 | |
while (low <= high) { | |
if ( | |
!matchesInFirstString[firstStringCharIndex] && | |
!matchesInSecondString[low] && | |
firstString.charAt(firstStringCharIndex) === secondString.charAt(low) | |
) { | |
++matchesFound | |
matchesInFirstString[firstStringCharIndex] = matchesInSecondString[ | |
low | |
] = true | |
low = high | |
} | |
low++ | |
} | |
} | |
if (matchesFound === 0) return 0 | |
let transpositionsCounterIndex = 0 | |
let transpositions = 0 | |
for ( | |
let firstStringCharIndex = 0; | |
firstStringCharIndex < firstString.length; | |
++firstStringCharIndex | |
) { | |
if (matchesInFirstString[firstStringCharIndex]) | |
while (transpositionsCounterIndex < secondString.length) { | |
if (matchesInSecondString[transpositionsCounterIndex]) { | |
transpositionsCounterIndex += 1 | |
break | |
} | |
if ( | |
firstString.charAt(firstStringCharIndex) !== | |
secondString.charAt(transpositionsCounterIndex) | |
) | |
++transpositions | |
transpositionsCounterIndex++ | |
} | |
} | |
let weight = | |
(matchesFound / firstString.length + | |
matchesFound / secondString.length + | |
(matchesFound - transpositions / 2) / matchesFound) / | |
3 | |
let lengthPrefix = 0 | |
const scoreScalingFactor = 0.1 | |
if (weight > 0.7) { | |
while ( | |
firstString[lengthPrefix] === secondString[lengthPrefix] && | |
lengthPrefix < 4 | |
) | |
++lengthPrefix | |
weight = weight + lengthPrefix * scoreScalingFactor * (1 - weight) | |
} | |
if (config?.asRatio) return weight | |
return weight * 100 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment