Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to enable HDBSCAN #208

Merged
merged 40 commits into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
046f703
Allowing epilogue in knn graph connection function
cjnolet Apr 20, 2021
086b08a
Fixing style
cjnolet Apr 20, 2021
37d2e0d
Adding missing argument
cjnolet Apr 23, 2021
709c040
Fixing typename
cjnolet Apr 23, 2021
b1fbc63
Updates
cjnolet Apr 23, 2021
81b630a
Fixing style
cjnolet Apr 23, 2021
6c52542
Updating to get hdbscan to compile
cjnolet Apr 23, 2021
4be3d24
Some updates
cjnolet Apr 26, 2021
1aaa7d5
changes
cjnolet Apr 29, 2021
dd6e537
agglomerative labeling to accept device arrays directly
cjnolet Apr 30, 2021
e3085cb
Cleaning up inputs to some of the single linkage prims
cjnolet Apr 30, 2021
ec1cca5
removing deprecated rmm policy usage
divyegala May 4, 2021
d32677d
Changes
cjnolet May 5, 2021
43f7cf8
removing deprecated rmm policy usage
divyegala May 4, 2021
e0ef5b3
alpha to weight alteration for precision
divyegala May 10, 2021
d809630
merge
divyegala May 10, 2021
1678c66
resolving errors
divyegala May 10, 2021
8018ea5
merge again
divyegala May 10, 2021
404e5ce
Merge branch 'fea-020-hdbscan' of github.com:cjnolet/raft into fea-02…
divyegala May 10, 2021
5eda4aa
trying alpha for all
divyegala May 11, 2021
030b3a9
double precision weight alteration
divyegala May 12, 2021
635d018
Checking in
cjnolet May 14, 2021
26dff4d
Fixing style
cjnolet May 19, 2021
55f274c
Removing unused epilogue from mst
cjnolet May 19, 2021
eb69413
Removing mst epilogue functor
cjnolet May 19, 2021
9f39d69
Merge branch 'branch-0.20' into fea-020-hdbscan
cjnolet May 19, 2021
dfebf10
Merge branch 'branch-21.06' into fea-020-hdbscan
cjnolet May 19, 2021
50d1cdc
Getting test to build
cjnolet May 20, 2021
c654156
Removing mstepiloguenoop since it's no longer being used
cjnolet May 22, 2021
136529a
another template param for weight alteration
divyegala May 24, 2021
e4b0f91
merge upstream
divyegala May 24, 2021
d7e93d9
renaming confusing variable name
divyegala May 24, 2021
e51d08b
working through merge
divyegala May 26, 2021
beea020
merging mst template PR
divyegala May 26, 2021
86cbf42
removing unnecessary comments
divyegala May 26, 2021
eb92e26
Review feedback
cjnolet May 27, 2021
e337c0d
Merge branch 'branch-21.06' into fea-020-hdbscan
cjnolet May 27, 2021
8b1e344
Update cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
cjnolet May 27, 2021
1933520
Fixing bad merge
cjnolet May 27, 2021
1bc3e68
Merge branch 'fea-020-hdbscan' of github.com:cjnolet/raft into fea-02…
cjnolet May 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 16 additions & 68 deletions cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,15 @@ class UnionFind {
};

/**
* Standard single-threaded agglomerative labeling on host. This should work
* well for smaller sizes of m. This is a C++ port of the original reference
* implementation of HDBSCAN.
* Agglomerative labeling on host. This has not been found to be a bottleneck
* in the algorithm. A parallel version of this can be done using a parallel
* variant of Kruskal's MST algorithm
* (ref http://cucis.ece.northwestern.edu/publications/pdf/HenPat12.pdf),
* which breaks apart the sorted MST results into overlapping subsets and
* independently runs Kruskal's algorithm on each subset, merging them back
* together into a single hierarchy when complete. Unfortunately,
* this is nontrivial and the speedup wouldn't bit e useful until this
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
* becomes a bottleneck.
*
* @tparam value_idx
* @tparam value_t
Expand All @@ -91,9 +97,8 @@ class UnionFind {
template <typename value_idx, typename value_t>
void build_dendrogram_host(const handle_t &handle, const value_idx *rows,
const value_idx *cols, const value_t *data,
size_t nnz, value_idx *children,
rmm::device_uvector<value_t> &out_delta,
rmm::device_uvector<value_idx> &out_size) {
size_t nnz, value_idx *children, value_t *out_delta,
value_idx *out_size) {
auto d_alloc = handle.get_device_allocator();
auto stream = handle.get_stream();

Expand All @@ -103,8 +108,6 @@ void build_dendrogram_host(const handle_t &handle, const value_idx *rows,
std::vector<value_idx> mst_dst_h(n_edges);
std::vector<value_t> mst_weights_h(n_edges);

printf("n_edges: %d\n", n_edges);

update_host(mst_src_h.data(), rows, n_edges, stream);
update_host(mst_dst_h.data(), cols, n_edges, stream);
update_host(mst_weights_h.data(), data, n_edges, stream);
Expand All @@ -113,12 +116,14 @@ void build_dendrogram_host(const handle_t &handle, const value_idx *rows,

std::vector<value_idx> children_h(n_edges * 2);
std::vector<value_idx> out_size_h(n_edges);
std::vector<value_t> out_delta_h(n_edges);

UnionFind<value_idx, value_t> U(nnz + 1);

for (value_idx i = 0; i < nnz; i++) {
value_idx a = mst_src_h[i];
value_idx b = mst_dst_h[i];
value_t delta = mst_weights_h[i];

value_idx aa = U.find(a);
value_idx bb = U.find(b);
Expand All @@ -127,72 +132,15 @@ void build_dendrogram_host(const handle_t &handle, const value_idx *rows,

children_h[children_idx] = aa;
children_h[children_idx + 1] = bb;
out_delta_h[i] = delta;
out_size_h[i] = U.size[aa] + U.size[bb];

U.perform_union(aa, bb);
}

out_size.resize(n_edges, stream);

printf("Finished dendrogram\n");

raft::update_device(children, children_h.data(), n_edges * 2, stream);
raft::update_device(out_size.data(), out_size_h.data(), n_edges, stream);
}

/**
* Parallel agglomerative labeling. This amounts to a parallel Kruskal's
* MST algorithm, which breaks apart the sorted MST results into overlapping
* subsets and independently runs Kruskal's algorithm on each subset,
* merging them back together into a single hierarchy when complete.
*
* This outputs the same format as the reference HDBSCAN, but as 4 separate
* arrays, rather than a single 2D array.
*
* Reference: http://cucis.ece.northwestern.edu/publications/pdf/HenPat12.pdf
*
* TODO: Investigate potential for the following end-to-end single-hierarchy batching:
* For each of k (independent) batches over the input:
* - Sample n elements from X
* - Compute mutual reachability graph of batch
* - Construct labels from batch
*
* The sampled datasets should have some overlap across batches. This will
* allow for the cluster hierarchies to be merged. Being able to batch
* will reduce the memory cost so that the full n^2 pairwise distances
* don't need to be materialized in memory all at once.
*
* @tparam value_idx
* @tparam value_t
* @param[in] handle the raft handle
* @param[in] rows src edges of the sorted MST
* @param[in] cols dst edges of the sorted MST
* @param[in] nnz the number of edges in the sorted MST
* @param[out] out_src parents of output
* @param[out] out_dst children of output
* @param[out] out_delta distances of output
* @param[out] out_size cluster sizes of output
* @param[in] k_folds number of folds for parallelizing label step
*/
template <typename value_idx, typename value_t>
void build_dendrogram_device(const handle_t &handle, const value_idx *rows,
const value_idx *cols, const value_t *data,
value_idx nnz, value_idx *children,
value_t *out_delta, value_idx *out_size,
value_idx k_folds) {
ASSERT(k_folds < nnz / 2, "k_folds must be < n_edges / 2");
/**
* divide (sorted) mst coo into overlapping subsets. Easiest way to do this is to
* break it into k-folds and iterate through two folds at a time.
*/

// 1. Generate ranges for the overlapping subsets

// 2. Run union-find in parallel for each pair of folds

// 3. Sort individual label hierarchies

// 4. Merge label hierarchies together
raft::update_device(out_size, out_size_h.data(), n_edges, stream);
raft::update_device(out_delta, out_delta_h.data(), n_edges, stream);
}

template <typename value_idx>
Expand Down
72 changes: 21 additions & 51 deletions cpp/include/raft/sparse/hierarchy/detail/mst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <raft/cudart_utils.h>
#include <raft/cuda_utils.cuh>

#include <raft/sparse/op/sort.h>
#include <raft/mr/device/buffer.hpp>
#include <raft/sparse/mst/mst.cuh>
#include <raft/sparse/selection/connect_components.cuh>
Expand All @@ -35,29 +36,6 @@ namespace raft {
namespace hierarchy {
namespace detail {

/**
* Sorts a COO by its weight
* @tparam value_idx
* @tparam value_t
* @param[inout] rows source edges
* @param[inout] cols dest edges
* @param[inout] data edge weights
* @param[in] nnz number of edges in edge list
* @param[in] stream cuda stream for which to order cuda operations
*/
template <typename value_idx, typename value_t>
void sort_coo_by_data(value_idx *rows, value_idx *cols, value_t *data,
value_idx nnz, cudaStream_t stream) {
thrust::device_ptr<value_idx> t_rows = thrust::device_pointer_cast(rows);
thrust::device_ptr<value_idx> t_cols = thrust::device_pointer_cast(cols);
thrust::device_ptr<value_t> t_data = thrust::device_pointer_cast(data);

auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));

thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz,
first);
}

template <typename value_idx, typename value_t>
void merge_msts(raft::Graph_COO<value_idx, value_idx, value_t> &coo1,
raft::Graph_COO<value_idx, value_idx, value_t> &coo2,
Expand Down Expand Up @@ -95,19 +73,20 @@ void merge_msts(raft::Graph_COO<value_idx, value_idx, value_t> &coo1,
* @param[inout] color the color labels array returned from the mst invocation
* @return updated MST edge list
*/
template <typename value_idx, typename value_t>
template <typename value_idx, typename value_t, typename red_op>
void connect_knn_graph(const raft::handle_t &handle, const value_t *X,
raft::Graph_COO<value_idx, value_idx, value_t> &msf,
size_t m, size_t n, value_idx *color,
red_op reduction_op,
raft::distance::DistanceType metric =
raft::distance::DistanceType::L2SqrtExpanded) {
auto d_alloc = handle.get_device_allocator();
auto stream = handle.get_stream();

raft::sparse::COO<value_t, value_idx> connected_edges(d_alloc, stream);

raft::linkage::connect_components<value_idx, value_t>(handle, connected_edges,
X, color, m, n);
raft::linkage::connect_components<value_idx, value_t>(
handle, connected_edges, X, color, m, n, reduction_op);

rmm::device_uvector<value_idx> indptr2(m + 1, stream);
raft::sparse::convert::sorted_coo_to_csr(connected_edges.rows(),
Expand Down Expand Up @@ -147,38 +126,34 @@ void connect_knn_graph(const raft::handle_t &handle, const value_t *X,
* @param[in] max_iter maximum iterations to run knn graph connection. This
* argument is really just a safeguard against the potential for infinite loops.
*/
template <typename value_idx, typename value_t>
template <typename value_idx, typename value_t, typename red_op>
void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
const value_idx *indptr, const value_idx *indices,
const value_t *pw_dists, size_t m, size_t n,
rmm::device_uvector<value_idx> &mst_src,
rmm::device_uvector<value_idx> &mst_dst,
rmm::device_uvector<value_t> &mst_weight,
const size_t nnz,
value_idx *mst_src, value_idx *mst_dst,
value_t *mst_weight, value_idx *color, size_t nnz,
red_op reduction_op,
cjnolet marked this conversation as resolved.
Show resolved Hide resolved
raft::distance::DistanceType metric =
raft::distance::DistanceType::L2SqrtExpanded,
int max_iter = 10) {
auto d_alloc = handle.get_device_allocator();
auto stream = handle.get_stream();

rmm::device_uvector<value_idx> color(m, stream);

// We want to have MST initialize colors on first call.
auto mst_coo = raft::mst::mst<value_idx, value_idx, value_t, double>(
handle, indptr, indices, pw_dists, (value_idx)m, nnz, color.data(), stream,
false, true);

int iters = 1;
int n_components =
linkage::get_n_components(color.data(), m, d_alloc, stream);
int n_components = linkage::get_n_components(color, m, d_alloc, stream);

while (n_components > 1 && iters < max_iter) {
connect_knn_graph<value_idx, value_t>(handle, X, mst_coo, m, n,
color.data());
connect_knn_graph<value_idx, value_t>(handle, X, mst_coo, m, n, color,
reduction_op);

iters++;

n_components = linkage::get_n_components(color.data(), m, d_alloc, stream);
n_components = linkage::get_n_components(color, m, d_alloc, stream);
}

/**
Expand All @@ -189,7 +164,7 @@ void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
* 1. There is a bug in this code somewhere
* 2. Either the given KNN graph wasn't generated from X or the same metric is not being used
* to generate the 1-nn (currently only L2SqrtExpanded is supported).
* 3. max_iter was not large enough to connect the graph.
* 3. max_iter was not large enough to connect the graph (less likely).
*
* Note that a KNN graph generated from 50 random isotropic balls (with significant overlap)
* was able to be connected in a single iteration.
Expand All @@ -201,20 +176,15 @@ void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
" or increase 'max_iter'",
max_iter);

sort_coo_by_data(mst_coo.src.data(), mst_coo.dst.data(),
mst_coo.weights.data(), mst_coo.n_edges, stream);

// TODO: be nice if we could pass these directly into the MST
mst_src.resize(mst_coo.n_edges, stream);
mst_dst.resize(mst_coo.n_edges, stream);
mst_weight.resize(mst_coo.n_edges, stream);
raft::sparse::op::coo_sort_by_weight(mst_coo.src.data(), mst_coo.dst.data(),
mst_coo.weights.data(), mst_coo.n_edges,
stream);

raft::copy_async(mst_src.data(), mst_coo.src.data(), mst_coo.n_edges, stream);
raft::copy_async(mst_dst.data(), mst_coo.dst.data(), mst_coo.n_edges, stream);
raft::copy_async(mst_weight.data(), mst_coo.weights.data(), mst_coo.n_edges,
stream);
raft::copy_async(mst_src, mst_coo.src.data(), mst_coo.n_edges, stream);
raft::copy_async(mst_dst, mst_coo.dst.data(), mst_coo.n_edges, stream);
raft::copy_async(mst_weight, mst_coo.weights.data(), mst_coo.n_edges, stream);
}

}; // namespace detail
}; // namespace hierarchy
}; // namespace raft
}; // namespace raft
15 changes: 9 additions & 6 deletions cpp/include/raft/sparse/hierarchy/single_linkage.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,19 @@ void single_linkage(const raft::handle_t &handle, const value_t *X, size_t m,
detail::get_distance_graph<value_idx, value_t, dist_type>(
handle, X, m, n, metric, indptr, indices, pw_dists, c);

rmm::device_uvector<value_idx> mst_rows(EMPTY, stream);
rmm::device_uvector<value_idx> mst_cols(EMPTY, stream);
rmm::device_uvector<value_t> mst_data(EMPTY, stream);
rmm::device_uvector<value_idx> mst_rows(m - 1, stream);
rmm::device_uvector<value_idx> mst_cols(m - 1, stream);
rmm::device_uvector<value_t> mst_data(m - 1, stream);

/**
* 2. Construct MST, sorted by weights
*/
rmm::device_uvector<value_idx> color(m, stream);
raft::linkage::FixConnectivitiesRedOp<value_idx, value_t> op(color.data(), m);
detail::build_sorted_mst<value_idx, value_t>(
handle, X, indptr.data(), indices.data(), pw_dists.data(), m, n, mst_rows,
mst_cols, mst_data, indices.size(), metric);
handle, X, indptr.data(), indices.data(), pw_dists.data(), m, n,
mst_rows.data(), mst_cols.data(), mst_data.data(), color.data(),
indices.size(), op, metric);

pw_dists.release();

Expand All @@ -93,7 +96,7 @@ void single_linkage(const raft::handle_t &handle, const value_t *X, size_t m,
// Create dendrogram
detail::build_dendrogram_host<value_idx, value_t>(
handle, mst_rows.data(), mst_cols.data(), mst_data.data(), n_edges,
out->children, out_delta, out_size);
out->children, out_delta.data(), out_size.data());
detail::extract_flattened_clusters(handle, out->labels, out->children,
n_clusters, m);

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/raft/sparse/op/reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ void max_duplicates(const raft::handle_t &handle,

compute_duplicates_mask(diff.data(), rows, cols, nnz, stream);

thrust::exclusive_scan(exec_policy, diff.data(), diff.data() + diff.size(),
diff.data());
thrust::exclusive_scan(thrust::cuda::par.on(stream), diff.data(),
diff.data() + diff.size(), diff.data());

// compute final size
value_idx size = 0;
Expand Down
23 changes: 23 additions & 0 deletions cpp/include/raft/sparse/op/sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,29 @@ void coo_sort(COO<T> *const in,
coo_sort<T>(in->n_rows, in->n_cols, in->nnz, in->rows(), in->cols(),
in->vals(), d_alloc, stream);
}

/**
* Sorts a COO by its weight
* @tparam value_idx
* @tparam value_t
* @param[inout] rows source edges
* @param[inout] cols dest edges
* @param[inout] data edge weights
* @param[in] nnz number of edges in edge list
* @param[in] stream cuda stream for which to order cuda operations
*/
template <typename value_idx, typename value_t>
void coo_sort_by_weight(value_idx *rows, value_idx *cols, value_t *data,
value_idx nnz, cudaStream_t stream) {
thrust::device_ptr<value_idx> t_rows = thrust::device_pointer_cast(rows);
thrust::device_ptr<value_idx> t_cols = thrust::device_pointer_cast(cols);
thrust::device_ptr<value_t> t_data = thrust::device_pointer_cast(data);

auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));

thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz,
first);
}
}; // namespace op
}; // end NAMESPACE sparse
}; // end NAMESPACE raft
Loading