Skip to content

Instantly share code, notes, and snippets.

@TurRil
Forked from MSHelm/my_kmeans.R
Created May 7, 2023 13:01
Show Gist options
  • Save TurRil/6883f26678e8ad5fdba8924ef2641bbf to your computer and use it in GitHub Desktop.
Save TurRil/6883f26678e8ad5fdba8924ef2641bbf to your computer and use it in GitHub Desktop.
my_kmeans <- function(data, k, n_iterations) {
# Helper function for euclidean distance
euclidean_distance <- function(p1, p2) {
dist <- sqrt(sum((p1-p2)^2))
return(dist)
}
# Initialize centers randomly
centers <- df[sample(nrow(df), k, replace = FALSE), ]
# Perform n iterations
iteration <- 1
while(iteration < n_iterations) {
# Calculate distance of each point to each center
distances <- matrix(Inf, nrow = nrow(df), ncol = k)
for (i in seq_len(nrow(df))) {
for (j in seq_len(k)) {
distances[i, j] <- euclidean_distance(df[i, ], centers[j, ])
}
}
# Assign each point to the closest center
cluster_id <- apply(distances, 1, which.min)
# Calculate new centers
for (i in seq_len(k)) {
this_cluster <- df[cluster_id == i,]
centers[k, ] <- colMeans(this_cluster)
}
iteration <- iteration + 1
}
cluster_id
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment