Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding fused_l2_nn_argmin wrapper to Pylibraft #924

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Adding fused l2 nn argmin primitive for pylibraft
  • Loading branch information
cjnolet committed Oct 18, 2022
commit 0e15a2d6d3bf1110cf69deb64e8e9818c8250a29
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ set_target_properties(raft_distance PROPERTIES EXPORT_NAME distance)
if(RAFT_COMPILE_DIST_LIBRARY)
add_library(raft_distance_lib
src/distance/pairwise_distance.cu
src/distance/fused_l2_min_arg.cu
src/distance/specializations/detail/canberra.cu
src/distance/specializations/detail/chebyshev.cu
src/distance/specializations/detail/correlation.cu
Expand Down
60 changes: 60 additions & 0 deletions cpp/include/raft_distance/fused_l2_min_arg.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <raft/core/handle.hpp>
#include <raft/distance/distance_types.hpp>

namespace raft::distance::runtime {
/**
* @brief Wrapper around fusedL2NN with minimum reduction operators.
*
* fusedL2NN cannot be compiled in the distance library due to the lambda
* operators, so this wrapper covers the most common case (minimum).
* This should be preferred to the more generic API when possible, in order to
* reduce compilation times for users of the shared library.
* @param[in] handle raft handle
* @param[out] min will contain the reduced output (Length = `m`)
* (on device)
* @param[in] x first matrix. Row major. Dim = `m x k`.
* (on device).
* @param[in] y second matrix. Row major. Dim = `n x k`.
* (on device).
* @param[in] m gemm m
* @param[in] n gemm n
* @param[in] k gemm k
*/
void fused_l2_nn_min_arg(
raft::handle_t const& handle,
int* min,
const float* x,
const float* y,
int m,
int n,
int k,
bool sqrt);

void fused_l2_nn_min_arg(
raft::handle_t const& handle,
int* min,
const double* x,
const double* y,
int m,
int n,
int k,
bool sqrt);


} // end namespace raft::distance::runtime
117 changes: 117 additions & 0 deletions cpp/src/distance/fused_l2_min_arg.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <raft/distance/fused_l2_nn.cuh>
#include <raft/distance/distance_types.hpp>
#include <raft/distance/specializations.cuh>
#include <raft/core/device_mdarray.hpp>
#include <thrust/for_each.h>
#include <thrust/tuple.h>
#include <raft/core/kvp.hpp>
#include <raft/core/handle.hpp>

namespace raft::distance::runtime {

template <typename IndexT, typename DataT>
struct KeyValueIndexOp {
__host__ __device__ __forceinline__ IndexT
operator()(const raft::KeyValuePair<IndexT, DataT>& a) const
{
printf("%d, %f\n", a.key, a.value);
return a.key;
}
};

template<typename value_t, typename idx_t>
void compute_fused_l2_nn_min_arg(
raft::handle_t const& handle,
idx_t* min,
const value_t* x,
const value_t* y,
idx_t m,
idx_t n,
idx_t k,
bool sqrt) {
rmm::device_uvector<int> workspace(m, handle.get_stream());
auto kvp = raft::make_device_vector<raft::KeyValuePair<idx_t, value_t>>(handle, m);

rmm::device_uvector<value_t> x_norms(m, handle.get_stream());
rmm::device_uvector<value_t> y_norms(n, handle.get_stream());
raft::linalg::rowNorm(x_norms.data(), x, k, m, raft::linalg::L2Norm, true, handle.get_stream());
raft::linalg::rowNorm(y_norms.data(), y, k, n, raft::linalg::L2Norm, true, handle.get_stream());

fusedL2NNMinReduce(kvp.data_handle(), x, y, x_norms.data(), y_norms.data(), m, n, k, (void*)workspace.data(), sqrt, true, handle.get_stream());

raft::print_device_vector("x", x, m*k, std::cout);
raft::print_device_vector("y", y, n*k, std::cout);

raft::print_device_vector("x_norms", x_norms.data(), m, std::cout);
raft::print_device_vector("y_norms", y_norms.data(), n, std::cout);

KeyValueIndexOp<idx_t, value_t> conversion_op;
thrust::transform(handle.get_thrust_policy(), kvp.data_handle(), kvp.data_handle()+m, min, conversion_op);
handle.sync_stream();
raft::print_device_vector("min", min, m, std::cout);
}

/**
* @brief Wrapper around fusedL2NN with minimum reduction operators.
*
* fusedL2NN cannot be compiled in the distance library due to the lambda
* operators, so this wrapper covers the most common case (minimum).
* This should be preferred to the more generic API when possible, in order to
* reduce compilation times for users of the shared library.
* @param[in] handle raft handle
* @param[out] min will contain the reduced output (Length = `m`)
* (on device)
* @param[in] x first matrix. Row major. Dim = `m x k`.
* (on device).
* @param[in] y second matrix. Row major. Dim = `n x k`.
* (on device).
* @param[in] xn L2 squared norm of `x`. Length = `m`. (on device).
* @param[in] yn L2 squared norm of `y`. Length = `n`. (on device)
* @param[in] m gemm m
* @param[in] n gemm n
* @param[in] k gemm k
*/
void fused_l2_nn_min_arg(
raft::handle_t const& handle,
int* min,
const float* x,
const float* y,
int m,
int n,
int k,
bool sqrt) {

compute_fused_l2_nn_min_arg<float, int>(handle, min, x, y, m, n, k, sqrt);
}

void fused_l2_nn_min_arg(
raft::handle_t const& handle,
int* min,
const double* x,
const double* y,
int m,
int n,
int k,
bool sqrt) {

compute_fused_l2_nn_min_arg<double, int>(handle, min, x, y, m, n, k, sqrt);
}


} // end namespace raft::distance::runtime
114 changes: 114 additions & 0 deletions cpp/test/util/fast_int_div.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "../test_utils.h"
#include <raft/util/fast_int_div.cuh>
#include <raft/util/cudart_utils.hpp>

#include <rmm/device_uvector.hpp>

#include <gtest/gtest.h>

namespace raft::util {

TEST(FastIntDiv, CpuTest)
{
for (int i = 0; i < 100; ++i) {
// get a positive divisor
int divisor;
do {
divisor = rand();
} while (divisor <= 0);
FastIntDiv fid(divisor);
// run it against a few random numbers and compare the outputs
for (int i = 0; i < 10000; ++i) {
auto num = rand();
auto correct = num / divisor;
auto computed = num / fid;
ASSERT_EQ(correct, computed) << " divisor=" << divisor << " num=" << num;
num = rand();
correct = num % divisor;
computed = num % fid;
ASSERT_EQ(correct, computed) << " divisor=" << divisor << " num=" << num;
num = -num;
correct = num / divisor;
computed = num / fid;
ASSERT_EQ(correct, computed) << " divisor=" << divisor << " num=" << num;
num = rand();
correct = num % divisor;
computed = num % fid;
ASSERT_EQ(correct, computed) << " divisor=" << divisor << " num=" << num;
}
}
}

__global__ void fastIntDivTestKernel(
int* computed, int* correct, const int* in, FastIntDiv fid, int divisor, int len)
{
auto tid = threadIdx.x + blockIdx.x * blockDim.x;
if (tid < len) {
computed[tid] = in[tid] % fid;
correct[tid] = in[tid] % divisor;
computed[len + tid] = -in[tid] % fid;
correct[len + tid] = -in[tid] % divisor;
}
}

TEST(FastIntDiv, GpuTest)
{
cudaStream_t stream = 0;
RAFT_CUDA_TRY(cudaStreamCreate(&stream));

static const int len = 100000;
static const int TPB = 128;
rmm::device_uvector<int> computed(len * 2, stream);
rmm::device_uvector<int> correct(len * 2, stream);
rmm::device_uvector<int> in(len, stream);
for (int i = 0; i < 100; ++i) {
// get a positive divisor
int divisor;
do {
divisor = rand();
} while (divisor <= 0);
FastIntDiv fid(divisor);
// run it against a few random numbers and compare the outputs
std::vector<int> h_in(len);
for (int i = 0; i < len; ++i) {
h_in[i] = rand();
}
raft::update_device(in.data(), h_in.data(), len, stream);
int nblks = raft::ceildiv(len, TPB);
fastIntDivTestKernel<<<nblks, TPB, 0, 0>>>(
computed.data(), correct.data(), in.data(), fid, divisor, len);
RAFT_CUDA_TRY(cudaStreamSynchronize(0));
ASSERT_TRUE(devArrMatch(correct.data(), computed.data(), len * 2, raft::Compare<int>()))
<< " divisor=" << divisor;
}
}

FastIntDiv dummyFunc(int num)
{
FastIntDiv fd(num);
return fd;
}

TEST(FastIntDiv, IncorrectUsage)
{
ASSERT_THROW(dummyFunc(-1), raft::exception);
ASSERT_THROW(dummyFunc(0), raft::exception);
}

} // namespace raft::util
3 changes: 2 additions & 1 deletion python/pylibraft/pylibraft/distance/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
# =============================================================================

# Set the list of Cython files to build
set(cython_sources pairwise_distance.pyx)
set(cython_sources pairwise_distance.pyx
fused_l2_nn.pyx)
set(linked_libraries raft::raft raft::distance)

# Build all of the Cython targets
Expand Down
1 change: 1 addition & 0 deletions python/pylibraft/pylibraft/distance/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
# limitations under the License.
#

from .fused_l2_nn import fused_l2_nn_argmin
from .pairwise_distance import distance as pairwise_distance
Loading