Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use registered strings for NVTX. Add more NVTX annotations. #518

Merged
merged 7 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ class FileHandle {
}
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }

KVIKIO_NVTX_FUNC_RANGE("cufileRead()", size);
KVIKIO_NVTX_SCOPED_RANGE("cufileRead()", size);
ssize_t ret = cuFileAPI::instance().Read(
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
CUFILE_CHECK_BYTES_DONE(ret);
Expand Down Expand Up @@ -387,7 +387,7 @@ class FileHandle {
}
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }

KVIKIO_NVTX_FUNC_RANGE("cufileWrite()", size);
KVIKIO_NVTX_SCOPED_RANGE("cufileWrite()", size);
ssize_t ret = cuFileAPI::instance().Write(
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
if (ret == -1) {
Expand Down Expand Up @@ -434,6 +434,7 @@ class FileHandle {
std::size_t gds_threshold = defaults::gds_threshold(),
bool sync_default_stream = true)
{
KVIKIO_NVTX_MARKER("FileHandle::pread()", size);
if (is_host_memory(buf)) {
auto op = [this](void* hostPtr_base,
std::size_t size,
Expand Down Expand Up @@ -510,6 +511,7 @@ class FileHandle {
std::size_t gds_threshold = defaults::gds_threshold(),
bool sync_default_stream = true)
{
KVIKIO_NVTX_MARKER("FileHandle::pwrite()", size);
if (is_host_memory(buf)) {
auto op = [this](const void* hostPtr_base,
std::size_t size,
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/kvikio/posix_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ std::size_t posix_device_io(int fd,
template <PartialIO PartialIOStatus>
std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t file_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_host_read()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_host_read()", size);
return detail::posix_host_io<IOOperationType::READ, PartialIOStatus>(
fd, buf, size, convert_size2off(file_offset));
}
Expand All @@ -233,7 +233,7 @@ std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t fil
template <PartialIO PartialIOStatus>
std::size_t posix_host_write(int fd, const void* buf, std::size_t size, std::size_t file_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_host_write()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_host_write()", size);
return detail::posix_host_io<IOOperationType::WRITE, PartialIOStatus>(
fd, buf, size, convert_size2off(file_offset));
}
Expand All @@ -257,7 +257,7 @@ inline std::size_t posix_device_read(int fd,
std::size_t file_offset,
std::size_t devPtr_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_device_read()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_device_read()", size);
return detail::posix_device_io<IOOperationType::READ>(
fd, devPtr_base, size, file_offset, devPtr_offset);
}
Expand All @@ -281,7 +281,7 @@ inline std::size_t posix_device_write(int fd,
std::size_t file_offset,
std::size_t devPtr_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_device_write()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_device_write()", size);
return detail::posix_device_io<IOOperationType::WRITE>(
fd, devPtr_base, size, file_offset, devPtr_offset);
}
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ inline std::size_t callback_host_memory(char* data,
ctx->overflow_error = true;
return CURL_WRITEFUNC_ERROR;
}
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_host_memory()", nbytes);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes);
std::memcpy(ctx->buf + ctx->offset, data, nbytes);
ctx->offset += nbytes;
return nbytes;
Expand All @@ -191,7 +191,7 @@ inline std::size_t callback_device_memory(char* data,
ctx->overflow_error = true;
return CURL_WRITEFUNC_ERROR;
}
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_device_memory()", nbytes);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes);

ctx->bounce_buffer->write(data, nbytes);
ctx->offset += nbytes;
Expand Down Expand Up @@ -515,7 +515,7 @@ class RemoteHandle {
*/
std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0)
{
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::read()", size);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size);

if (file_offset + size > _nbytes) {
std::stringstream ss;
Expand Down Expand Up @@ -578,7 +578,7 @@ class RemoteHandle {
std::size_t file_offset = 0,
std::size_t task_size = defaults::task_size())
{
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::pread()", size);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size);
auto task = [this](void* devPtr_base,
std::size_t size,
std::size_t file_offset,
Expand Down
99 changes: 78 additions & 21 deletions cpp/include/kvikio/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,47 +287,104 @@ struct libkvikio_domain {
static constexpr char const* name{"libkvikio"};
};

// Macro to concatenate two tokens x and y.
#define KVIKIO_CONCAT_HELPER(x, y) x##y
#define KVIKIO_CONCAT(x, y) KVIKIO_CONCAT_HELPER(x, y)

// Macro to create a static, registered string that will not have a name conflict with any
// registered string defined in the same scope.
#define KVIKIO_REGISTER_STRING(msg) \
[](const char* a_msg) -> auto& { \
static nvtx3::registered_string_in<libkvikio_domain> a_reg_str{a_msg}; \
return a_reg_str; \
}(msg)

// Macro overloads of KVIKIO_NVTX_FUNC_RANGE
#define KVIKIO_NVTX_FUNC_RANGE_1() NVTX3_FUNC_RANGE_IN(libkvikio_domain)
#define KVIKIO_NVTX_FUNC_RANGE_2(msg, val) \
nvtx3::scoped_range_in<libkvikio_domain> _kvikio_nvtx_range \
{ \
nvtx3::event_attributes \
{ \
msg, nvtx3::payload { convert_to_64bit(val) } \
} \
#define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(libkvikio_domain)

#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) \
nvtx3::scoped_range_in<libkvikio_domain> KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
{ \
nvtx3::event_attributes \
{ \
KVIKIO_REGISTER_STRING(msg), nvtx3::payload { convert_to_64bit(val) } \
} \
}
#define GET_KVIKIO_NVTX_FUNC_RANGE_MACRO(_1, _2, NAME, ...) NAME

#define KVIKIO_NVTX_MARKER_IMPL(msg, val) \
nvtx3::mark_in<libkvikio_domain>( \
nvtx3::event_attributes{KVIKIO_REGISTER_STRING(msg), nvtx3::payload{convert_to_64bit(val)}})

#endif

/**
* @brief Convenience macro for generating an NVTX range in the `libkvikio` domain
* from the lifetime of a function.
*
* Takes two arguments (message, payload) or no arguments, in which case the name
* of the immediately enclosing function returned by `__func__` is used.
* Takes no argument. The name of the immediately enclosing function returned by `__func__` is used
* as the message.
*
* Example:
* ```
* void some_function1(){
* KVIKIO_NVTX_FUNC_RANGE("my function", 42);
* ...
* }
* void some_function2(){
* KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function2` is used
* void some_function(){
* KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function` is used as the message
* ...
* }
* ```
*/
#ifdef KVIKIO_CUDA_FOUND
#define KVIKIO_NVTX_FUNC_RANGE(...) \
GET_KVIKIO_NVTX_FUNC_RANGE_MACRO( \
__VA_ARGS__, KVIKIO_NVTX_FUNC_RANGE_2, KVIKIO_NVTX_FUNC_RANGE_1) \
(__VA_ARGS__)
#define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL()
#else
#define KVIKIO_NVTX_FUNC_RANGE(...) \
do { \
} while (0)
#endif

/**
* @brief Convenience macro for generating an NVTX scoped range in the `libkvikio` domain to
* annotate a time duration.
*
* Takes two arguments (message, payload).
*
* Example:
* ```
* void some_function(){
* KVIKIO_NVTX_SCOPED_RANGE("my function", 42);
* ...
* }
* ```
*/
#ifdef KVIKIO_CUDA_FOUND
#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val)
#else
#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) \
do { \
} while (0)
#endif

/**
* @brief Convenience macro for generating an NVTX marker in the `libkvikio` domain to annotate a
* certain time point.
*
* Takes two arguments (message, payload). Use this macro to annotate asynchronous I/O operations,
* where the payload refers to the I/O size.
*
* Example:
* ```
* std::future<void> some_function(){
* size_t io_size{2077};
* KVIKIO_NVTX_MARKER("I/O operation", io_size);
* perform_async_io_operation(io_size);
* ...
* }
* ```
*/
#ifdef KVIKIO_CUDA_FOUND
madsbk marked this conversation as resolved.
Show resolved Hide resolved
#define KVIKIO_NVTX_MARKER(message, payload) KVIKIO_NVTX_MARKER_IMPL(message, payload)
#else
#define KVIKIO_NVTX_MARKER(message, payload) \
do { \
} while (0)
#endif

} // namespace kvikio