Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into perf-batched-memcpy-orc-stats
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule authored Dec 11, 2024
2 parents 06b8276 + 3801e74 commit 6fe45b8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ rapids_cpm_init()
# Not using rapids-cmake since we never want to find, always download.
CPMAddPackage(
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW TRUE GIT_TAG
14bb233d2420f7187a690f0bb528ec0420c70d48
c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
)
rapids_make_logger(cudf EXPORT_SET cudf-exports)

Expand Down
11 changes: 5 additions & 6 deletions cpp/include/cudf/detail/get_value.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@
#pragma once

#include <cudf/column/column_view.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
Expand Down Expand Up @@ -48,11 +49,9 @@ T get_value(column_view const& col_view, size_type element_index, rmm::cuda_stre
CUDF_EXPECTS(data_type(type_to_id<T>()) == col_view.type(), "get_value data type mismatch");
CUDF_EXPECTS(element_index >= 0 && element_index < col_view.size(),
"invalid element_index value");
T result;
CUDF_CUDA_TRY(cudaMemcpyAsync(
&result, col_view.data<T>() + element_index, sizeof(T), cudaMemcpyDefault, stream.value()));
stream.synchronize();
return result;
return cudf::detail::make_host_vector_sync(
device_span<T const>{col_view.data<T>() + element_index, 1}, stream)
.front();
}

} // namespace detail
Expand Down
10 changes: 7 additions & 3 deletions cpp/include/cudf/table/table_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#pragma once

#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/utilities/cuda_memcpy.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -251,7 +253,7 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st
// A buffer of CPU memory is allocated to hold the ColumnDeviceView
// objects. Once filled, the CPU memory is then copied to device memory
// and the pointer is set in the d_columns member.
std::vector<int8_t> h_buffer(padded_views_size_bytes);
auto h_buffer = cudf::detail::make_host_vector<int8_t>(padded_views_size_bytes, stream);
// Each ColumnDeviceView instance may have child objects which may
// require setting some internal device pointers before being copied
// from CPU to device.
Expand All @@ -266,8 +268,10 @@ auto contiguous_copy_column_device_views(HostTableView source_view, rmm::cuda_st
auto d_columns = detail::child_columns_to_device_array<ColumnDeviceView>(
source_view.begin(), source_view.end(), h_ptr, d_ptr);

CUDF_CUDA_TRY(cudaMemcpyAsync(d_ptr, h_ptr, views_size_bytes, cudaMemcpyDefault, stream.value()));
stream.synchronize();
auto const h_span = host_span<int8_t const>{h_buffer}.subspan(
static_cast<int8_t const*>(h_ptr) - h_buffer.data(), views_size_bytes);
auto const d_span = device_span<int8_t>{static_cast<int8_t*>(d_ptr), views_size_bytes};
cudf::detail::cuda_memcpy(d_span, h_span, stream);
return std::make_tuple(std::move(descendant_storage), d_columns);
}

Expand Down

0 comments on commit 6fe45b8

Please sign in to comment.