Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Rust bindings for CAGRA #34

Merged
merged 41 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
a4ec121
Rust bindings for cuvs
benfred Feb 6, 2024
cae7cd3
share common workspace metadata
benfred Feb 6, 2024
160b363
support for building cagra index
benfred Feb 7, 2024
4e277e5
working search unittest
benfred Feb 12, 2024
0027115
cudaFree
benfred Feb 12, 2024
e11e4e0
functioning unittest
benfred Feb 13, 2024
15aceda
add cagra example program
benfred Feb 20, 2024
2e4b477
Add resources to to_host/to_device functions
benfred Feb 20, 2024
83e202e
add From trait for converting ndarray to ManagedTensor
benfred Feb 21, 2024
1990d02
basic CI
benfred Feb 21, 2024
5c7aafa
.
benfred Feb 21, 2024
b44b81c
.
benfred Feb 26, 2024
13b1848
.
benfred Feb 26, 2024
bd8c432
.
benfred Feb 26, 2024
d411803
.
benfred Feb 26, 2024
a2e8009
remove export
benfred Feb 26, 2024
b4dc043
.
benfred Feb 26, 2024
af4b0ba
.
benfred Feb 26, 2024
e7a47de
.
benfred Feb 26, 2024
a757060
one more time
benfred Feb 27, 2024
6c54aa4
don't download python artifacts
benfred Feb 27, 2024
79560c3
one more time
benfred Feb 27, 2024
023795d
.
benfred Feb 27, 2024
d3ceacf
add libclang
benfred Feb 27, 2024
a60edc8
add clang
benfred Feb 27, 2024
db00e6f
set LIBCLANG_PATH
benfred Feb 27, 2024
82ccb25
correct libclang_path
benfred Feb 27, 2024
1ffdfd7
check in cuvs_bindings.rs
benfred Feb 27, 2024
c091196
Revert "check in cuvs_bindings.rs"
benfred Feb 28, 2024
0005294
try harder with libclang
benfred Feb 28, 2024
fc413fb
.
benfred Feb 28, 2024
8397651
.
benfred Feb 28, 2024
4c26cd7
:facepalm:
benfred Feb 29, 2024
eddab35
Merge branch 'branch-24.04' into rust_bindings
benfred Feb 29, 2024
0433e07
.
benfred Mar 4, 2024
b3a9624
Merge branch 'branch-24.04' into rust_bindings
benfred Mar 4, 2024
3eec2c3
update to latest 24.04 cagra api
benfred Mar 4, 2024
6cd44b1
Merge branch 'rust_bindings' of https://github.com/benfred/cuvs into …
benfred Mar 4, 2024
2dd4ea0
add example to README
benfred Mar 4, 2024
a781042
.
benfred Mar 4, 2024
e836117
update dependencies.yaml
benfred Mar 4, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
rust-build:
needs: cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: ${{ inputs.build_type || 'branch' }}
branch: ${{ inputs.branch }}
arch: "amd64"
date: ${{ inputs.date }}
container_image: "rapidsai/ci-conda:latest"
node_type: "gpu-v100-latest-1"
run_script: "ci/build_rust.sh"
sha: ${{ inputs.sha }}
python-build:
needs: [cpp-build]
secrets: inherit
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ jobs:
- conda-python-build
- conda-python-tests
- docs-build
- rust-build
- wheel-build-cuvs
- wheel-tests-cuvs
- devcontainer
Expand Down Expand Up @@ -72,6 +73,16 @@ jobs:
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
rust-build:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_rust.sh"
wheel-build-cuvs:
needs: checks
secrets: inherit
Expand Down
61 changes: 61 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,67 @@ cuvsCagraIndexParamsDestroy(index_params);
cuvsResourcesDestroy(res);
```

### Rust API

```rust
use cuvs::cagra::{Index, IndexParams, SearchParams};
use cuvs::{ManagedTensor, Resources, Result};

use ndarray::s;
use ndarray_rand::rand_distr::Uniform;
use ndarray_rand::RandomExt;

/// Example showing how to index and search data with CAGRA
fn cagra_example() -> Result<()> {
let res = Resources::new()?;

// Create a new random dataset to index
let n_datapoints = 65536;
let n_features = 512;
let dataset =
ndarray::Array::<f32, _>::random((n_datapoints, n_features), Uniform::new(0., 1.0));

// build the cagra index
let build_params = IndexParams::new()?;
let index = Index::build(&res, &build_params, &dataset)?;
println!(
"Indexed {}x{} datapoints into cagra index",
n_datapoints, n_features
);

// use the first 4 points from the dataset as queries : will test that we get them back
// as their own nearest neighbor
let n_queries = 4;
let queries = dataset.slice(s![0..n_queries, ..]);

let k = 10;

// CAGRA search API requires queries and outputs to be on device memory
// copy query data over, and allocate new device memory for the distances/ neighbors
// outputs
let queries = ManagedTensor::from(&queries).to_device(&res)?;
let mut neighbors_host = ndarray::Array::<u32, _>::zeros((n_queries, k));
let neighbors = ManagedTensor::from(&neighbors_host).to_device(&res)?;

let mut distances_host = ndarray::Array::<f32, _>::zeros((n_queries, k));
let distances = ManagedTensor::from(&distances_host).to_device(&res)?;

let search_params = SearchParams::new()?;

index.search(&res, &search_params, &queries, &neighbors, &distances)?;

// Copy back to host memory
distances.to_host(&res, &mut distances_host)?;
neighbors.to_host(&res, &mut neighbors_host)?;

// nearest neighbors should be themselves, since queries are from the
// dataset
println!("Neighbors {:?}", neighbors_host);
println!("Distances {:?}", distances_host);
Ok(())
}
```


## Contributing

Expand Down
40 changes: 40 additions & 0 deletions ci/build_rust.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh

rapids-dependency-file-generator \
--output conda \
--file_key rust \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml

rapids-mamba-retry env create --force -f env.yaml -n rust

# seeing failures on activating the environment here on unbound locals
# apply workaround from https://github.com/conda/conda/issues/8186#issuecomment-532874667
set +eu
conda activate rust
set -eu

rapids-print-env

# we need to set up LIBCLANG_PATH to allow rust bindgen to work,
# grab it from the conda env
export LIBCLANG_PATH=$(dirname $(find /opt/conda -name libclang.so | head -n 1))
echo "LIBCLANG_PATH=$LIBCLANG_PATH"

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

# installing libcuvs/libraft will speed up the rust build substantially
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libcuvs \
libraft

# build and test the rust bindings
cd rust
cargo test
16 changes: 15 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ files:
- cupy
- docs
- py_version
- test_py_cuvs
rust:
output: none
includes:
- build
- cuda
- rust
py_build_py_cuvs:
output: pyproject
pyproject_dir: python/cuvs
Expand Down Expand Up @@ -308,6 +313,15 @@ dependencies:
- recommonmark
- sphinx-copybutton
- sphinx-markdown-tables
rust:
common:
- output_types: [conda]
packages:
- make
- rust
# clang/liblclang only needed for bindgen support
- clang
- libclang
build_wheels:
common:
- output_types: [requirements, pyproject]
Expand Down
16 changes: 16 additions & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[workspace]
members = [
"cuvs",
"cuvs-sys",
]
resolver = "2"

[workspace.package]
version = "0.1.0"
edition = "2021"
repository = "https://github.com/rapidsai/cuvs"
homepage = "https://github.com/rapidsai/cuvs"
description = "RAPIDS vector search library"
authors = ["NVIDIA Corporation"]
license = "Apache-2.0"

16 changes: 16 additions & 0 deletions rust/cuvs-sys/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "cuvs-sys"
description = "Low-level rust bindings to libcuvs"
links = "cuvs"
version.workspace = true
edition.workspace = true
repository.workspace = true
homepage.workspace = true
authors.workspace = true
license.workspace = true

[dependencies]

[build-dependencies]
cmake = ">=0.1"
bindgen = ">=0.69"
112 changes: 112 additions & 0 deletions rust/cuvs-sys/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use std::env;
use std::io::BufRead;
use std::path::PathBuf;

/*
TODO:
* would be nice to use already built versions of libcuvs_c / libcuvs
if they already existed, but this might not be possible here using cmake-rs
(https://github.com/rust-lang/cmake-rs/issues/111)
* figure out how this works with rust packaging: does the c++ code
need to be in a subdirectory? If so would a symlink work here
should we be using static linking ?
*/
fn main() {
// build the cuvs c-api library with cmake, and link it into this crate
let cuvs_build = cmake::Config::new("../../cpp")
.configure_arg("-DBUILD_TESTS:BOOL=OFF")
.configure_arg("-DBUILD_C_LIBRARY:BOOL=ON")
.build();

println!(
"cargo:rustc-link-search=native={}/lib",
cuvs_build.display()
);
println!("cargo:rustc-link-lib=dylib=cuvs_c");
println!("cargo:rustc-link-lib=dylib=cudart");

// we need some extra flags both to link against cuvs, and also to run bindgen
// specifically we need to:
// * -I flags to set the include path to pick up cudaruntime.h during bindgen
// * -rpath-link settings to link to libraft/libcuvs.so etc during the link
// Rather than redefine the logic to set all these things, lets pick up the values from
// the cuvs cmake build in its CMakeCache.txt and set from there
let out_path = PathBuf::from(env::var("OUT_DIR").unwrap());

let cmake_cache: Vec<String> = std::io::BufReader::new(
std::fs::File::open(format!("{}/build/CMakeCache.txt", out_path.display()))
.expect("Failed to open cuvs CMakeCache.txt"),
)
.lines()
.map(|x| x.expect("Couldn't parse line from CMakeCache.txt"))
.collect();

let cmake_cxx_flags = cmake_cache
.iter()
.find(|x| x.starts_with("CMAKE_CXX_FLAGS:STRING="))
.expect("failed to find CMAKE_CXX_FLAGS in CMakeCache.txt")
.strip_prefix("CMAKE_CXX_FLAGS:STRING=")
.unwrap();

let cmake_linker_flags = cmake_cache
.iter()
.find(|x| x.starts_with("CMAKE_EXE_LINKER_FLAGS:STRING="))
.expect("failed to find CMAKE_EXE_LINKER_FLAGS in CMakeCache.txt")
.strip_prefix("CMAKE_EXE_LINKER_FLAGS:STRING=")
.unwrap();

// need to propagate the rpath-link settings to dependent crates =(
// (this will get added as DEP_CUVS_CMAKE_LINKER_ARGS in dependent crates)
println!("cargo:cmake_linker_flags={}", cmake_linker_flags);

// add the required rpath-link flags to the cargo build
for flag in cmake_linker_flags.split(' ') {
if flag.starts_with("-Wl,-rpath-link") {
println!("cargo:rustc-link-arg={}", flag);
}
}

// run bindgen to automatically create rust bindings for the cuvs c-api
bindgen::Builder::default()
.header("cuvs_c_wrapper.h")
.clang_arg("-I../../cpp/include")
// needed to find cudaruntime.h
.clang_args(cmake_cxx_flags.split(' '))
// include dlpack from the cmake build dependencies
.clang_arg(format!(
"-I{}/build/_deps/dlpack-src/include/",
out_path.display()
))
// add `must_use' declarations to functions returning cuvsError_t
// (so that if you don't check the error code a compile warning is
// generated)
.must_use_type("cuvsError_t")
// Only generate bindings for cuvs/cagra types and functions
.allowlist_type("(cuvs|cagra|DL).*")
.allowlist_function("(cuvs|cagra).*")
.rustified_enum("(cuvs|cagra|DL).*")
// also need some basic cuda mem functions
// (TODO: should we be adding in RMM support instead here?)
.allowlist_function("(cudaMalloc|cudaFree|cudaMemcpy)")
.rustified_enum("cudaError")
.generate()
.expect("Unable to generate cagra_c bindings")
.write_to_file(out_path.join("cuvs_bindings.rs"))
.expect("Failed to write generated rust bindings");
}
20 changes: 20 additions & 0 deletions rust/cuvs-sys/cuvs_c_wrapper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// wrapper file containing all the C-API's we should automatically be creating rust
// bindings for
#include <cuvs/core/c_api.h>
#include <cuvs/neighbors/cagra.h>
Loading
Loading