Perform k-means clustering on text embeddings.
Examples
if (FALSE) { # \dontrun{
library(ggplot2)
# Cluster documents
docs_clustered <- docs_embedded |>
hf_cluster_texts(k = 3)
# Reduce dimensions and visualize
library(uwot)
emb_matrix <- do.call(rbind, docs_clustered$embedding)
coords <- umap(emb_matrix)
docs_clustered |>
mutate(umap_1 = coords[, 1], umap_2 = coords[, 2]) |>
ggplot(aes(umap_1, umap_2, color = factor(cluster))) +
geom_point(size = 3)
} # }