Skip to content

Instantly share code, notes, and snippets.

@KBeDevel
Last active August 10, 2024 01:43
Show Gist options
  • Save KBeDevel/25d608dfe32417067be337ab29a0c3a5 to your computer and use it in GitHub Desktop.
Save KBeDevel/25d608dfe32417067be337ab29a0c3a5 to your computer and use it in GitHub Desktop.
Similarity calculator for string inputs implemented in TypeScript
/**
* Get the similarity percentage between two strings.
* Based on {@link https://gist.github.com/sumn2u/0e0b5d9505ad096284928a987ace13fb#file-jaro-wrinker-js}
*/
export function calculateStringsSimilarity(
firstString: string,
secondString: string,
config?: {
/**
* If true, the function will return the similarity percentage as a float number between 0 and 1.
* If false, the function will return the similarity percentage as an float between 0 and 100.
*/
asRatio?: boolean
},
) {
if (firstString.trim() === secondString.trim())
return config?.asRatio ? 1 : 100
let matchesFound = 0
const range =
Math.floor(Math.max(firstString.length, secondString.length) / 2) - 1
const matchesInFirstString = Array.from({ length: firstString.length })
const matchesInSecondString = Array.from({ length: secondString.length })
for (
let firstStringCharIndex = 0;
firstStringCharIndex < firstString.length;
++firstStringCharIndex
) {
const high =
firstStringCharIndex + range <= secondString.length
? firstStringCharIndex + range
: secondString.length - 1
let low = firstStringCharIndex >= range ? firstStringCharIndex - range : 0
while (low <= high) {
if (
!matchesInFirstString[firstStringCharIndex] &&
!matchesInSecondString[low] &&
firstString.charAt(firstStringCharIndex) === secondString.charAt(low)
) {
++matchesFound
matchesInFirstString[firstStringCharIndex] = matchesInSecondString[
low
] = true
low = high
}
low++
}
}
if (matchesFound === 0) return 0
let transpositionsCounterIndex = 0
let transpositions = 0
for (
let firstStringCharIndex = 0;
firstStringCharIndex < firstString.length;
++firstStringCharIndex
) {
if (matchesInFirstString[firstStringCharIndex])
while (transpositionsCounterIndex < secondString.length) {
if (matchesInSecondString[transpositionsCounterIndex]) {
transpositionsCounterIndex += 1
break
}
if (
firstString.charAt(firstStringCharIndex) !==
secondString.charAt(transpositionsCounterIndex)
)
++transpositions
transpositionsCounterIndex++
}
}
let weight =
(matchesFound / firstString.length +
matchesFound / secondString.length +
(matchesFound - transpositions / 2) / matchesFound) /
3
let lengthPrefix = 0
const scoreScalingFactor = 0.1
if (weight > 0.7) {
while (
firstString[lengthPrefix] === secondString[lengthPrefix] &&
lengthPrefix < 4
)
++lengthPrefix
weight = weight + lengthPrefix * scoreScalingFactor * (1 - weight)
}
if (config?.asRatio) return weight
return weight * 100
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment