Skip to content

Commit

Permalink
Remote IO: http support (rapidsai#464)
Browse files Browse the repository at this point in the history
Support read directly from a http server like:
```python
import kvikio
import cupy

with kvikio.RemoteFile.from_http_url("http://127.0.0.1:9000/myfile") as f:
    ary = cupy.empty(f.nbytes, dtype="uint8")
    f.read(ary)
```

This PR is the first step to support S3 using libcurl instead of [aws-s3-sdk](rapidsai#426), which has some pros and cons:

* Pros 
    * The [global conda pinning issue](rapidsai#426 (comment)) is less of a problem.
    * We can support other protocols such as http, ftp, and Azure’s storage, without much work. 
    * We avoid the [free-after-main issue in aws-s3-sdk](https://github.com/rapidsai/kvikio/blob/000126516db430988ab9af5ee1576ca3fe6afe27/cpp/include/kvikio/remote_handle.hpp#L87-L94). This is huge since we would otherwise have to pass around a `S3Context` in libcudf and cudf to handle shutdown correctly.  This is not a problem in libcurl, see https://curl.se/libcurl/c/libcurl.html under `Global constants`.
* Cons 
    * Hard to support the AWS configuration file. We will require the user to either specify the options programmatically or through environment variables like `AWS_ACCESS_KEY_ID ` and `AWS_SECRET_ACCESS_KEY `.

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Lawrence Mitchell (https://github.com/wence-)

URL: rapidsai#464
  • Loading branch information
madsbk authored Oct 8, 2024
1 parent e64c363 commit dc536af
Show file tree
Hide file tree
Showing 25 changed files with 1,305 additions and 2 deletions.
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- dask>=2022.05.2
- doxygen=1.9.1
- gcc_linux-aarch64=11.*
- libcurl>=7.87.0
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand All @@ -28,6 +29,7 @@ dependencies:
- pytest
- pytest-cov
- python>=3.10,<3.13
- rangehttpserver
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- scikit-build-core>=0.10.0
- sphinx
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies:
- gcc_linux-64=11.*
- libcufile-dev=1.4.0.31
- libcufile=1.4.0.31
- libcurl>=7.87.0
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand All @@ -30,6 +31,7 @@ dependencies:
- pytest
- pytest-cov
- python>=3.10,<3.13
- rangehttpserver
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- scikit-build-core>=0.10.0
- sphinx
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- doxygen=1.9.1
- gcc_linux-aarch64=11.*
- libcufile-dev
- libcurl>=7.87.0
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand All @@ -28,6 +29,7 @@ dependencies:
- pytest
- pytest-cov
- python>=3.10,<3.13
- rangehttpserver
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- scikit-build-core>=0.10.0
- sphinx
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dependencies:
- doxygen=1.9.1
- gcc_linux-64=11.*
- libcufile-dev
- libcurl>=7.87.0
- ninja
- numcodecs !=0.12.0
- numpy>=1.23,<3.0a0
Expand All @@ -28,6 +29,7 @@ dependencies:
- pytest
- pytest-cov
- python>=3.10,<3.13
- rangehttpserver
- rapids-build-backend>=0.3.0,<0.4.0.dev0
- scikit-build-core>=0.10.0
- sphinx
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/kvikio/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ requirements:
- rapids-build-backend >=0.3.0,<0.4.0.dev0
- scikit-build-core >=0.10.0
- libkvikio ={{ version }}
- libcurl==7.87.0
run:
- python
- numpy >=1.23,<3.0a0
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/libkvikio/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ requirements:
{% else %}
- libcufile-dev # [linux]
{% endif %}
- libcurl==7.87.0

outputs:
- name: libkvikio
Expand All @@ -74,6 +75,7 @@ outputs:
- cmake {{ cmake_version }}
host:
- cuda-version ={{ cuda_version }}
- libcurl==7.87.0
run:
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
{% if cuda_major == "11" %}
Expand Down
15 changes: 15 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ rapids_cmake_write_version_file(include/kvikio/version_config.hpp)
rapids_cmake_build_type(Release)

# build options
option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON)
option(KvikIO_BUILD_EXAMPLES "Configure CMake to build examples" ON)
option(KvikIO_BUILD_TESTS "Configure CMake to build tests" ON)

Expand All @@ -50,6 +51,10 @@ rapids_find_package(
INSTALL_EXPORT_SET kvikio-exports
)

if(KvikIO_REMOTE_SUPPORT)
include(cmake/thirdparty/get_libcurl.cmake)
endif()

rapids_find_package(
CUDAToolkit
BUILD_EXPORT_SET kvikio-exports
Expand Down Expand Up @@ -138,6 +143,10 @@ target_link_libraries(
kvikio INTERFACE Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS}
$<TARGET_NAME_IF_EXISTS:nvtx3::nvtx3-cpp>
)
if(TARGET CURL::libcurl)
target_link_libraries(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:CURL::libcurl>)
target_compile_definitions(kvikio INTERFACE $<BUILD_LOCAL_INTERFACE:KVIKIO_LIBCURL_FOUND>)
endif()
target_compile_features(kvikio INTERFACE cxx_std_17)

# optionally build examples
Expand Down Expand Up @@ -231,6 +240,12 @@ if(NOT already_set_kvikio)
target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_STREAM_API_FOUND)
endif()
endif()

if(TARGET CURL::libcurl)
target_link_libraries(kvikio::kvikio INTERFACE CURL::libcurl)
target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_LIBCURL_FOUND)
endif()

endif()
]=]
)
Expand Down
32 changes: 32 additions & 0 deletions cpp/cmake/thirdparty/get_libcurl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# =============================================================================
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# This function finds libcurl and sets any additional necessary environment variables.
function(find_and_configure_libcurl)
include(${rapids-cmake-dir}/cpm/find.cmake)

rapids_cpm_find(
CURL 7.87.0
GLOBAL_TARGETS libcurl
BUILD_EXPORT_SET kvikio-exports
INSTALL_EXPORT_SET kvikio-exports
CPM_ARGS
GIT_REPOSITORY https://github.com/curl/curl
GIT_TAG curl-7_87_0
OPTIONS "BUILD_CURL_EXE OFF" "BUILD_SHARED_LIBS OFF" "BUILD_TESTING OFF" "CURL_USE_LIBPSL OFF"
"CURL_DISABLE_LDAP ON" "CMAKE_POSITION_INDEPENDENT_CODE ON"
)
endfunction()

find_and_configure_libcurl()
Loading

0 comments on commit dc536af

Please sign in to comment.