Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use registered strings for NVTX. Add more NVTX annotations. #518

Merged
merged 7 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Further changes and cleanup
  • Loading branch information
kingcrimsontianyu committed Oct 31, 2024
commit 4d01c216c2c1969024c3c56fd840babe48455510
4 changes: 2 additions & 2 deletions cpp/include/kvikio/file_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ class FileHandle {
}
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }

KVIKIO_NVTX_FUNC_RANGE("cufileRead()", size);
KVIKIO_NVTX_SCOPED_RANGE("cufileRead()", size);
ssize_t ret = cuFileAPI::instance().Read(
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
CUFILE_CHECK_BYTES_DONE(ret);
Expand Down Expand Up @@ -387,7 +387,7 @@ class FileHandle {
}
if (sync_default_stream) { CUDA_DRIVER_TRY(cudaAPI::instance().StreamSynchronize(nullptr)); }

KVIKIO_NVTX_FUNC_RANGE("cufileWrite()", size);
KVIKIO_NVTX_SCOPED_RANGE("cufileWrite()", size);
ssize_t ret = cuFileAPI::instance().Write(
_handle, devPtr_base, size, convert_size2off(file_offset), convert_size2off(devPtr_offset));
if (ret == -1) {
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/kvikio/posix_io.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ std::size_t posix_device_io(int fd,
template <PartialIO PartialIOStatus>
std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t file_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_host_read()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_host_read()", size);
return detail::posix_host_io<IOOperationType::READ, PartialIOStatus>(
fd, buf, size, convert_size2off(file_offset));
}
Expand All @@ -233,7 +233,7 @@ std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t fil
template <PartialIO PartialIOStatus>
std::size_t posix_host_write(int fd, const void* buf, std::size_t size, std::size_t file_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_host_write()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_host_write()", size);
return detail::posix_host_io<IOOperationType::WRITE, PartialIOStatus>(
fd, buf, size, convert_size2off(file_offset));
}
Expand All @@ -257,7 +257,7 @@ inline std::size_t posix_device_read(int fd,
std::size_t file_offset,
std::size_t devPtr_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_device_read()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_device_read()", size);
return detail::posix_device_io<IOOperationType::READ>(
fd, devPtr_base, size, file_offset, devPtr_offset);
}
Expand All @@ -281,7 +281,7 @@ inline std::size_t posix_device_write(int fd,
std::size_t file_offset,
std::size_t devPtr_offset)
{
KVIKIO_NVTX_FUNC_RANGE("posix_device_write()", size);
KVIKIO_NVTX_SCOPED_RANGE("posix_device_write()", size);
return detail::posix_device_io<IOOperationType::WRITE>(
fd, devPtr_base, size, file_offset, devPtr_offset);
}
Expand Down
8 changes: 4 additions & 4 deletions cpp/include/kvikio/remote_handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ inline std::size_t callback_host_memory(char* data,
ctx->overflow_error = true;
return CURL_WRITEFUNC_ERROR;
}
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_host_memory()", nbytes);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes);
std::memcpy(ctx->buf + ctx->offset, data, nbytes);
ctx->offset += nbytes;
return nbytes;
Expand All @@ -96,7 +96,7 @@ inline std::size_t callback_device_memory(char* data,
ctx->overflow_error = true;
return CURL_WRITEFUNC_ERROR;
}
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle - callback_device_memory()", nbytes);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes);

CUstream stream = detail::StreamsByThread::get();
CUDA_DRIVER_TRY(cudaAPI::instance().MemcpyHtoDAsync(
Expand Down Expand Up @@ -421,7 +421,7 @@ class RemoteHandle {
*/
std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0)
{
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::read()", size);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size);

if (file_offset + size > _nbytes) {
std::stringstream ss;
Expand Down Expand Up @@ -480,7 +480,7 @@ class RemoteHandle {
std::size_t file_offset = 0,
std::size_t task_size = defaults::task_size())
{
KVIKIO_NVTX_FUNC_RANGE("RemoteHandle::pread()", size);
KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size);
auto task = [this](void* devPtr_base,
std::size_t size,
std::size_t file_offset,
Expand Down
45 changes: 30 additions & 15 deletions cpp/include/kvikio/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,16 +300,16 @@ struct libkvikio_domain {
}(msg)

// Macro overloads of KVIKIO_NVTX_FUNC_RANGE
#define KVIKIO_NVTX_FUNC_RANGE_1() NVTX3_FUNC_RANGE_IN(libkvikio_domain)
#define KVIKIO_NVTX_FUNC_RANGE_2(msg, val) \
#define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(libkvikio_domain)

#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val) \
nvtx3::scoped_range_in<libkvikio_domain> KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
{ \
nvtx3::event_attributes \
{ \
KVIKIO_REGISTER_STRING(msg), nvtx3::payload { convert_to_64bit(val) } \
} \
}
#define GET_KVIKIO_NVTX_FUNC_RANGE_MACRO(_1, _2, NAME, ...) NAME

#define KVIKIO_NVTX_MARKER_IMPL(msg, val) \
nvtx3::mark_in<libkvikio_domain>( \
Expand All @@ -321,32 +321,47 @@ struct libkvikio_domain {
* @brief Convenience macro for generating an NVTX range in the `libkvikio` domain
* from the lifetime of a function.
*
* Takes two arguments (message, payload) or no arguments, in which case the name
* of the immediately enclosing function returned by `__func__` is used.
* Takes no argument. The name of the immediately enclosing function returned by `__func__` is used
* as the message.
*
* Example:
* ```
* void some_function1(){
* KVIKIO_NVTX_FUNC_RANGE("my function", 42);
* ...
* }
* void some_function2(){
* KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function2` is used
* void some_function(){
* KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function` is used as the message
* ...
* }
* ```
*/
#ifdef KVIKIO_CUDA_FOUND
#define KVIKIO_NVTX_FUNC_RANGE(...) \
GET_KVIKIO_NVTX_FUNC_RANGE_MACRO( \
__VA_ARGS__, KVIKIO_NVTX_FUNC_RANGE_2, KVIKIO_NVTX_FUNC_RANGE_1) \
(__VA_ARGS__)
#define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL()
#else
#define KVIKIO_NVTX_FUNC_RANGE(...) \
do { \
} while (0)
#endif

/**
* @brief Convenience macro for generating an NVTX scoped range in the `libkvikio` domain to
* annotate a time duration.
*
* Takes two arguments (message, payload).
*
* Example:
* ```
* void some_function(){
* KVIKIO_NVTX_SCOPED_RANGE("my function", 42);
* ...
* }
* ```
*/
#ifdef KVIKIO_CUDA_FOUND
#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) KVIKIO_NVTX_SCOPED_RANGE_IMPL(msg, val)
#else
#define KVIKIO_NVTX_SCOPED_RANGE(msg, val) \
do { \
} while (0)
#endif

/**
* @brief Convenience macro for generating an NVTX marker in the `libkvikio` domain to annotate a
* certain time point.
Expand Down
Loading