Pure TypeScript implementations of common clustering algorithms.
1. Interface | 2. Algorithms | 3. Utilities
import * as clA from "t-ski/clustering-algorithms"
type TVector = number[]
type TMatrix[] = number[][]
type TCluster = TVector[]
interface AClustering<O = TCluster> {
clusters: O[]
}
clA.<algorithm>(data: I[], ...args): AClustering<I>
clA.<algorithm>(data: number[]|TVector[], k: number = 2): AClustering
new clA.KMeans(DATA, 3).clusters;
O(n²) | Θ(n)
clA.KMeans(data, k?)
O(n²) | Θ(n)
clA.KMeansPP(data, k?)
O(n²) | Θ(n)
clA.KMedoids(data, k?)
clA.<algorithm>(data: number[]|TVector[], epsilon?: number): AClustering
If not defined, the epsilon parameter is estimated from the given data.
new clA.MeanShift(DATA, 2).clusters;
O = Θ(n²)
clA.MeanShift(data, epsilon?): AClustering & { noise: TCluster }
O(n²) | Θ(n log n)
clA.DBSCAN(data, epsilon?, minPoints: number = 4): AClustering & { noise: TCluster }
O(n²) | Θ(n log n)
clA.OPTICS(data, epsilon?, minPoints: number = 4): AClustering & { noise: TCluster }
clA.<algorithm>(adjacencyMatrix: TMatrix, k: number = 2): AClustering<number[]>
Input to graph-based clustering is a target graph's adjacency matrix. Output corresponds to clusters of related graph node indexes.
new clA.ConnectedComponents(DATA).clusters;
O = Θ(n³)
clA.ConnectedComponents(adjacencyMatrix)
O = Θ(n³)
clA.Divisive(adjacencyMatrix, k: number = 2)
O = Θ(n³)
clA.Markov(adjacencyMatrix, e: number = 2, r: number = 2)
clA.<algorithm>(data: number[]|TVector[], k: number = 2): AClustering
new clA.AverageLinkage(DATA, 4).clusters;
O = Θ(n³)
clA.AverageLinkage(data, k?)
O = Θ(n³)
clA.CentroidLinkage(data, k?)
O = Θ(n³)
clA.CompleteLinkage(data, k?)
O = Θ(n³)
clA.HausdorffLinkage(data, k?)
O = Θ(n³)
clA.MedianLinkage(data, k?)
O = Θ(n³)
clA.SingleLinkage(data, k?)
O = Θ(n³)
clA.Ward(data, k?)
f: (ℝ×…×ℝ)² ↦ ℕ
clA.util.distance.<metric>(p1: TVector, p2: TVector = []): number
clA.AClustering.setDistanceMetric(util.distance.manhattan);
const clusters = new clA.KMeans(DATA, 3).clusters;
clA.util.distance.euclidean(p1: TVector, p2: TVector = []): number
clA.util.distance.manhattan(p1: TVector, p2: TVector = []): number
clA.util.distance.chebyshev(p1: TVector, p2: TVector = []): number
clA.util.distance.cosine(p1: TVector, p2: TVector = []): number
clA.util.quality.<metric>(clusters: TCluster[]): number
const clusters = new clA.KMeans(DATA, 3).clusters;
const clusteringQuality = clA.util.quality.silhouetteCoefficient(clusters);
f: ℕ×(ℝ×…×ℝ) ↦ [-1,1] → 1
clA.util.quality.silhouetteCoefficient(clusters: TCluster[]): number
f: ℕ×(ℝ×…×ℝ) ↦ [0,∞) → ∞
clA.util.quality.dunnIndex(clusters: TCluster[]): number
f: ℕ×(ℝ×…×ℝ) ↦ [0,∞) → 0
clA.util.quality.daviesBouldinIndex(clusters: TCluster[]): number
The VectorDataMap
utility class helps with associating arbitrary entites with data points (view example).
type TDataPoint<T> = [ TVector, T ]
new clA.util.VectorDataMap<T>(data: TDataPoint<T>[]): {
vectors: TVector[];
getEntity: (vector: TVector) => T|T[];
getVector: (entity: T) => TVector;
getCluster: (clusters: TCluster[], vector: TVector) => number|number[];
getCluster: (clusters: TCluster[], entity: T) => number|number[];
}
© Thassilo Martin Schiepanski