Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to enable HDBSCAN #208

Merged
merged 40 commits into from
May 27, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
046f703
Allowing epilogue in knn graph connection function
cjnolet Apr 20, 2021
086b08a
Fixing style
cjnolet Apr 20, 2021
37d2e0d
Adding missing argument
cjnolet Apr 23, 2021
709c040
Fixing typename
cjnolet Apr 23, 2021
b1fbc63
Updates
cjnolet Apr 23, 2021
81b630a
Fixing style
cjnolet Apr 23, 2021
6c52542
Updating to get hdbscan to compile
cjnolet Apr 23, 2021
4be3d24
Some updates
cjnolet Apr 26, 2021
1aaa7d5
changes
cjnolet Apr 29, 2021
dd6e537
agglomerative labeling to accept device arrays directly
cjnolet Apr 30, 2021
e3085cb
Cleaning up inputs to some of the single linkage prims
cjnolet Apr 30, 2021
ec1cca5
removing deprecated rmm policy usage
divyegala May 4, 2021
d32677d
Changes
cjnolet May 5, 2021
43f7cf8
removing deprecated rmm policy usage
divyegala May 4, 2021
e0ef5b3
alpha to weight alteration for precision
divyegala May 10, 2021
d809630
merge
divyegala May 10, 2021
1678c66
resolving errors
divyegala May 10, 2021
8018ea5
merge again
divyegala May 10, 2021
404e5ce
Merge branch 'fea-020-hdbscan' of github.com:cjnolet/raft into fea-02…
divyegala May 10, 2021
5eda4aa
trying alpha for all
divyegala May 11, 2021
030b3a9
double precision weight alteration
divyegala May 12, 2021
635d018
Checking in
cjnolet May 14, 2021
26dff4d
Fixing style
cjnolet May 19, 2021
55f274c
Removing unused epilogue from mst
cjnolet May 19, 2021
eb69413
Removing mst epilogue functor
cjnolet May 19, 2021
9f39d69
Merge branch 'branch-0.20' into fea-020-hdbscan
cjnolet May 19, 2021
dfebf10
Merge branch 'branch-21.06' into fea-020-hdbscan
cjnolet May 19, 2021
50d1cdc
Getting test to build
cjnolet May 20, 2021
c654156
Removing mstepiloguenoop since it's no longer being used
cjnolet May 22, 2021
136529a
another template param for weight alteration
divyegala May 24, 2021
e4b0f91
merge upstream
divyegala May 24, 2021
d7e93d9
renaming confusing variable name
divyegala May 24, 2021
e51d08b
working through merge
divyegala May 26, 2021
beea020
merging mst template PR
divyegala May 26, 2021
86cbf42
removing unnecessary comments
divyegala May 26, 2021
eb92e26
Review feedback
cjnolet May 27, 2021
e337c0d
Merge branch 'branch-21.06' into fea-020-hdbscan
cjnolet May 27, 2021
8b1e344
Update cpp/include/raft/sparse/hierarchy/detail/agglomerative.cuh
cjnolet May 27, 2021
1933520
Fixing bad merge
cjnolet May 27, 2021
1bc3e68
Merge branch 'fea-020-hdbscan' of github.com:cjnolet/raft into fea-02…
cjnolet May 27, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
another template param for weight alteration
  • Loading branch information
divyegala committed May 24, 2021
commit 136529a91a439681c61dc381663240c1179cc29b
10 changes: 2 additions & 8 deletions cpp/include/raft/sparse/hierarchy/detail/mst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ void connect_knn_graph(const raft::handle_t &handle, const value_t *X,

// On the second call, we hand the MST the original colors
// and the new set of edges and let it restart the optimization process
auto new_mst = raft::mst::mst<value_idx, value_idx, value_t>(
auto new_mst = raft::mst::mst<value_idx, value_idx, value_t, double>(
handle, indptr2.data(), connected_edges.cols(), connected_edges.vals(), m,
connected_edges.nnz, color, stream, false, false);

Expand Down Expand Up @@ -164,7 +164,7 @@ void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
rmm::device_uvector<value_idx> color(m, stream);

// We want to have MST initialize colors on first call.
auto mst_coo = raft::mst::mst<value_idx, value_idx, value_t>(
auto mst_coo = raft::mst::mst<value_idx, value_idx, value_t, double>(
handle, indptr, indices, pw_dists, (value_idx)m, nnz, color.data(), stream,
false, true);

Expand Down Expand Up @@ -201,12 +201,6 @@ void build_sorted_mst(const raft::handle_t &handle, const value_t *X,
" or increase 'max_iter'",
max_iter);

RAFT_EXPECTS(mst_coo.n_edges == m - 1,
"n_edges should be %d but was %d. This"
"could be an indication of duplicate edges returned from the"
"MST or symmetrization stage.",
m - 1, mst_coo.n_edges);

sort_coo_by_data(mst_coo.src.data(), mst_coo.dst.data(),
mst_coo.weights.data(), mst_coo.n_edges, stream);

Expand Down
46 changes: 19 additions & 27 deletions cpp/include/raft/sparse/mst/detail/mst_kernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ namespace raft {
namespace mst {
namespace detail {

template <typename vertex_t, typename edge_t, typename weight_t>
template <typename vertex_t, typename edge_t, typename alteration_t>
__global__ void kernel_min_edge_per_vertex(
const edge_t* offsets, const vertex_t* indices, const weight_t* weights,
const edge_t* offsets, const vertex_t* indices, const alteration_t* weights,
const vertex_t* color, const vertex_t* color_index, edge_t* new_mst_edge,
const bool* mst_edge, weight_t* min_edge_color, const vertex_t v) {
const bool* mst_edge, alteration_t* min_edge_color, const vertex_t v) {
edge_t tid = threadIdx.x + blockIdx.x * blockDim.x;

unsigned warp_id = tid / 32;
unsigned lane_id = tid % 32;

__shared__ edge_t min_edge_index[32];
__shared__ weight_t min_edge_weight[32];
__shared__ alteration_t min_edge_weight[32];
__shared__ vertex_t min_color[32];

min_edge_index[lane_id] = std::numeric_limits<edge_t>::max();
min_edge_weight[lane_id] = std::numeric_limits<weight_t>::max();
min_edge_weight[lane_id] = std::numeric_limits<alteration_t>::max();
min_color[lane_id] = std::numeric_limits<vertex_t>::max();

__syncthreads();
Expand All @@ -61,7 +61,7 @@ __global__ void kernel_min_edge_per_vertex(
// assuming one warp per row
// find min for each thread in warp
for (edge_t e = row_start + lane_id; e < row_end; e += 32) {
weight_t curr_edge_weight = weights[e];
alteration_t curr_edge_weight = weights[e];
vertex_t successor_color_idx = color_index[indices[e]];
vertex_t successor_color = color[successor_color_idx];

Expand Down Expand Up @@ -92,7 +92,7 @@ __global__ void kernel_min_edge_per_vertex(

// min edge may now be found in first thread
if (lane_id == 0) {
if (min_edge_weight[0] != std::numeric_limits<weight_t>::max()) {
if (min_edge_weight[0] != std::numeric_limits<alteration_t>::max()) {
new_mst_edge[warp_id] = min_edge_index[0];

// atomically set min edge per color
Expand All @@ -102,12 +102,13 @@ __global__ void kernel_min_edge_per_vertex(
}
}

template <typename vertex_t, typename edge_t, typename weight_t>
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
__global__ void min_edge_per_supervertex(
const vertex_t* color, const vertex_t* color_index, edge_t* new_mst_edge,
bool* mst_edge, const vertex_t* indices, const weight_t* weights,
const weight_t* altered_weights, vertex_t* temp_src, vertex_t* temp_dst,
weight_t* temp_weights, const weight_t* min_edge_color, const vertex_t v,
const alteration_t* altered_weights, vertex_t* temp_src, vertex_t* temp_dst,
weight_t* temp_weights, const alteration_t* min_edge_color, const vertex_t v,
bool symmetrize_output) {
auto tid = get_1D_idx<vertex_t>();
if (tid < v) {
Expand All @@ -119,7 +120,7 @@ __global__ void min_edge_per_supervertex(
// find minimum edge is same as minimum edge of whole supervertex
// if yes, that is part of mst
if (edge_idx != std::numeric_limits<edge_t>::max()) {
weight_t vertex_weight = altered_weights[edge_idx];
alteration_t vertex_weight = altered_weights[edge_idx];

bool add_edge = false;
if (min_edge_color[vertex_color] == vertex_weight) {
Expand Down Expand Up @@ -281,31 +282,22 @@ __global__ void final_color_indices(const vertex_t v, const vertex_t* color,

// Alterate the weights, make all undirected edge weight unique while keeping Wuv == Wvu
// Consider using curand device API instead of precomputed random_values array
template <typename vertex_t, typename edge_t, typename weight_t>
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
__global__ void alteration_kernel(const vertex_t v, const edge_t e,
const edge_t* offsets,
const vertex_t* indices,
const weight_t* weights, double max,
weight_t* random_values,
weight_t* altered_weights, int alpha,
bool use_alpha) {
const weight_t* weights, alteration_t max,
alteration_t* random_values,
alteration_t* altered_weights) {
auto row = get_1D_idx<vertex_t>();
if (row < v) {
auto row_begin = offsets[row];
auto row_end = offsets[row + 1];
for (auto i = row_begin; i < row_end; i++) {
auto column = indices[i];
// doing the later step explicity in double for precision
if (use_alpha) {
altered_weights[i] =
alpha * weights[i] + alpha * max *
(static_cast<double>(random_values[row]) +
static_cast<double>(random_values[column]));
} else {
altered_weights[i] =
weights[i] + max * (static_cast<double>(random_values[row]) +
static_cast<double>(random_values[column]));
}
altered_weights[i] =
weights[i] + max * (random_values[row] + random_values[column]);
}
}
}
Expand Down
81 changes: 45 additions & 36 deletions cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "mst_kernels.cuh"
#include "utils.cuh"


#include <raft/cudart_utils.h>
#include <rmm/device_buffer.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -50,12 +49,13 @@ inline curandStatus_t curand_generate_uniformX(curandGenerator_t generator,
return curandGenerateUniformDouble(generator, outputPtr, n);
}

template <typename vertex_t, typename edge_t, typename weight_t>
MST_solver<vertex_t, edge_t, weight_t>::MST_solver(
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(
const raft::handle_t& handle_, const edge_t* offsets_,
const vertex_t* indices_, const weight_t* weights_, const vertex_t v_,
const edge_t e_, vertex_t* color_, cudaStream_t stream_,
bool symmetrize_output_, bool initialize_colors_, int iterations_, int alpha_)
bool symmetrize_output_, bool initialize_colors_, int iterations_)
: handle(handle_),
offsets(offsets_),
indices(indices_),
Expand All @@ -77,8 +77,7 @@ MST_solver<vertex_t, edge_t, weight_t>::MST_solver(
stream(stream_),
symmetrize_output(symmetrize_output_),
initialize_colors(initialize_colors_),
iterations(iterations_),
alpha(alpha_) {
iterations(iterations_) {
max_blocks = handle_.get_device_properties().maxGridSize[0];
max_threads = handle_.get_device_properties().maxThreadsPerBlock;
sm_count = handle_.get_device_properties().multiProcessorCount;
Expand All @@ -94,9 +93,10 @@ MST_solver<vertex_t, edge_t, weight_t>::MST_solver(
thrust::sequence(policy, next_color.begin(), next_color.end(), 0);
}

template <typename vertex_t, typename edge_t, typename weight_t>
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
raft::Graph_COO<vertex_t, edge_t, weight_t>
MST_solver<vertex_t, edge_t, weight_t>::solve() {
MST_solver<vertex_t, edge_t, weight_t, alteration_t>::solve() {
RAFT_EXPECTS(v > 0, "0 vertices");
RAFT_EXPECTS(e > 0, "0 edges");
RAFT_EXPECTS(offsets != nullptr, "Null offsets.");
Expand Down Expand Up @@ -155,11 +155,12 @@ MST_solver<vertex_t, edge_t, weight_t>::solve() {
timer3 += duration_us(stop - start);
#endif

RAFT_EXPECTS(mst_edge_count[0] <= n_expected_edges,
auto curr_mst_edge_count = mst_edge_count[0];
RAFT_EXPECTS(curr_mst_edge_count <= n_expected_edges,
"Number of edges found by MST is invalid. This may be due to "
"loss in precision. Try increasing precision of weights.");

if (prev_mst_edge_count[0] == mst_edge_count[0]) {
if (curr_mst_edge_count == prev_mst_edge_count[0]) {
#ifdef MST_TIME
std::cout << "Iterations: " << i << std::endl;
std::cout << timer0 << "," << timer1 << "," << timer2 << "," << timer3
Expand Down Expand Up @@ -217,8 +218,10 @@ struct alteration_functor {
};

// Compute the uper bound for the alteration
template <typename vertex_t, typename edge_t, typename weight_t>
double MST_solver<vertex_t, edge_t, weight_t>::alteration_max() {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
alteration_t
MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration_max() {
auto policy = rmm::exec_policy(stream);
rmm::device_vector<weight_t> tmp(e);
thrust::device_ptr<const weight_t> weights_ptr(weights);
Expand All @@ -238,21 +241,22 @@ double MST_solver<vertex_t, edge_t, weight_t>::alteration_max() {
auto max =
thrust::transform_reduce(policy, begin, end, alteration_functor<weight_t>(),
init, thrust::minimum<weight_t>());
return max / static_cast<double>(2);
return max / static_cast<alteration_t>(2);
}

// Compute the alteration to make all undirected edge weight unique
// Preserves weights order
template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::alteration() {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration() {
auto nthreads = std::min(v, max_threads);
auto nblocks = std::min((v + nthreads - 1) / nthreads, max_blocks);

// maximum alteration that does not change realtive weights order
double max = alteration_max();
alteration_t max = alteration_max();

// pool of rand values
rmm::device_vector<weight_t> rand_values(v);
rmm::device_vector<alteration_t> rand_values(v);

// Random number generator
curandGenerator_t randGen;
Expand All @@ -267,18 +271,17 @@ void MST_solver<vertex_t, edge_t, weight_t>::alteration() {
RAFT_EXPECTS(curand_status == CURAND_STATUS_SUCCESS,
"MST: CURAND cleanup failed");

bool use_alpha = max < 1e-3 && sizeof(weight_t) == 4;

//Alterate the weights, make all undirected edge weight unique while keeping Wuv == Wvu
detail::alteration_kernel<<<nblocks, nthreads, 0, stream>>>(
v, e, offsets, indices, weights, max, rand_values.data().get(),
altered_weights.data().get(), alpha, use_alpha);
altered_weights.data().get());
}

// updates colors of vertices by propagating the lower color to the higher
template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::label_prop(vertex_t* mst_src,
vertex_t* mst_dst) {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::label_prop(
vertex_t* mst_src, vertex_t* mst_dst) {
// update the colors of both ends its until there is no change in colors
thrust::host_vector<edge_t> curr_mst_edge_count = mst_edge_count;

Expand Down Expand Up @@ -315,11 +318,13 @@ void MST_solver<vertex_t, edge_t, weight_t>::label_prop(vertex_t* mst_src,
}

// Finds the minimum edge from each vertex to the lowest color
template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::min_edge_per_vertex() {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t,
alteration_t>::min_edge_per_vertex() {
auto policy = rmm::exec_policy(stream);
thrust::fill(policy, min_edge_color.begin(), min_edge_color.end(),
std::numeric_limits<weight_t>::max());
std::numeric_limits<alteration_t>::max());
thrust::fill(policy, new_mst_edge.begin(), new_mst_edge.end(),
std::numeric_limits<weight_t>::max());

Expand All @@ -328,17 +333,19 @@ void MST_solver<vertex_t, edge_t, weight_t>::min_edge_per_vertex() {
vertex_t* color_ptr = color.data().get();
edge_t* new_mst_edge_ptr = new_mst_edge.data().get();
bool* mst_edge_ptr = mst_edge.data().get();
weight_t* min_edge_color_ptr = min_edge_color.data().get();
weight_t* altered_weights_ptr = altered_weights.data().get();
alteration_t* min_edge_color_ptr = min_edge_color.data().get();
alteration_t* altered_weights_ptr = altered_weights.data().get();

detail::kernel_min_edge_per_vertex<<<v, n_threads, 0, stream>>>(
offsets, indices, altered_weights_ptr, color_ptr, color_index,
new_mst_edge_ptr, mst_edge_ptr, min_edge_color_ptr, v);
}

// Finds the minimum edge from each supervertex to the lowest color
template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::min_edge_per_supervertex() {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t,
alteration_t>::min_edge_per_supervertex() {
auto nthreads = std::min(v, max_threads);
auto nblocks = std::min((v + nthreads - 1) / nthreads, max_blocks);

Expand All @@ -349,8 +356,8 @@ void MST_solver<vertex_t, edge_t, weight_t>::min_edge_per_supervertex() {
vertex_t* color_ptr = color.data().get();
edge_t* new_mst_edge_ptr = new_mst_edge.data().get();
bool* mst_edge_ptr = mst_edge.data().get();
weight_t* min_edge_color_ptr = min_edge_color.data().get();
weight_t* altered_weights_ptr = altered_weights.data().get();
alteration_t* min_edge_color_ptr = min_edge_color.data().get();
alteration_t* altered_weights_ptr = altered_weights.data().get();
vertex_t* temp_src_ptr = temp_src.data().get();
vertex_t* temp_dst_ptr = temp_dst.data().get();
weight_t* temp_weights_ptr = temp_weights.data().get();
Expand All @@ -370,8 +377,9 @@ void MST_solver<vertex_t, edge_t, weight_t>::min_edge_per_supervertex() {
}
}

template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::check_termination() {
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::check_termination() {
vertex_t nthreads = std::min(2 * v, (vertex_t)max_threads);
vertex_t nblocks =
std::min((2 * v + nthreads - 1) / nthreads, (vertex_t)max_blocks);
Expand All @@ -394,8 +402,9 @@ struct new_edges_functor {
}
};

template <typename vertex_t, typename edge_t, typename weight_t>
void MST_solver<vertex_t, edge_t, weight_t>::append_src_dst_pair(
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t>
void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::append_src_dst_pair(
vertex_t* mst_src, vertex_t* mst_dst, weight_t* mst_weights) {
auto policy = rmm::exec_policy(stream);

Expand Down
9 changes: 5 additions & 4 deletions cpp/include/raft/sparse/mst/mst.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,16 @@
namespace raft {
namespace mst {

template <typename vertex_t, typename edge_t, typename weight_t>
template <typename vertex_t, typename edge_t, typename weight_t,
typename alteration_t = weight_t>
raft::Graph_COO<vertex_t, edge_t, weight_t> mst(
const raft::handle_t& handle, edge_t const* offsets, vertex_t const* indices,
weight_t const* weights, vertex_t const v, edge_t const e, vertex_t* color,
cudaStream_t stream, bool symmetrize_output = true,
bool initialize_colors = true, int iterations = 0, int alpha = 1e6) {
MST_solver<vertex_t, edge_t, weight_t> mst_solver(
bool initialize_colors = true, int iterations = 0) {
MST_solver<vertex_t, edge_t, weight_t, alteration_t> mst_solver(
handle, offsets, indices, weights, v, e, color, stream, symmetrize_output,
initialize_colors, iterations, alpha);
initialize_colors, iterations);
return mst_solver.solve();
}

Expand Down
Loading