Skip to content

Commit 9b12c6e

Browse files
authored
Add CUPTI Python API (#479)
Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: #479
1 parent bc5f3a0 commit 9b12c6e

9 files changed

Lines changed: 1115 additions & 2 deletions

File tree

conda/recipes/rapidsmpf/recipe.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,12 @@ requirements:
5858
- ${{ compiler("c") }}
5959
- ${{ compiler("cxx") }}
6060
- ${{ compiler("cuda") }}
61+
- cuda-cupti-dev
6162
- cuda-version =${{ cuda_version }}
6263
- ${{ stdlib("c") }}
6364
host:
6465
- cuda-cudart-dev
66+
- cuda-cupti-dev
6567
- cuda-version =${{ cuda_version }}
6668
- cython >=3.0.0
6769
- librapidsmpf =${{ version }}
@@ -74,10 +76,11 @@ requirements:
7476
- rmm =${{ minor_version }}
7577
- scikit-build-core >=0.10.0
7678
run:
77-
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
79+
- cuda-cupti
7880
- if: cuda_major == "12"
7981
then: cuda-python >=12.9.2,<13.0a0
8082
else: cuda-python >=13.0.1,<14.0a0
83+
- ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
8184
- cudf =${{ minor_version }}
8285
- cupy >=13.6.0
8386
- librapidsmpf =${{ version }}
@@ -98,6 +101,7 @@ requirements:
98101
- ${{ stdlib("c") }}
99102
by_name:
100103
- cuda-cudart
104+
- cuda-cupti
101105
- cuda-version
102106
- openmpi
103107
- mpi4py

cpp/examples/example_cupti_monitor.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ int main() {
5858
// Allocate device memory using rmm::device_buffer
5959
rmm::device_buffer buf(allocation_size, rmm::cuda_stream_default);
6060
device_buffers.push_back(std::move(buf));
61-
} catch (rmm::bad_alloc& e) {
61+
} catch (rmm::bad_alloc const& e) {
6262
std::cerr << "rmm::device_buffer allocation failed: " << e.what()
6363
<< std::endl;
6464
break;

python/rapidsmpf/rapidsmpf/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ set(cython_modules config.pyx cuda_stream.pyx progress_thread.pyx rmm_resource_a
77
shuffler.pyx statistics.pyx
88
)
99

10+
# Add cupti module conditionally if CUPTI support is enabled
11+
if(RAPIDSMPF_HAVE_CUPTI)
12+
list(APPEND cython_modules cupti.pyx)
13+
endif()
14+
1015
rapids_cython_create_modules(
1116
CXX
1217
SOURCE_FILES "${cython_modules}"
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from libc.stddef cimport size_t
5+
from libcpp cimport bool as bool_t
6+
from libcpp.memory cimport unique_ptr
7+
from libcpp.string cimport string
8+
from libcpp.unordered_map cimport unordered_map
9+
from libcpp.vector cimport vector
10+
11+
12+
cdef extern from "<chrono>" namespace "std::chrono" nogil:
13+
cdef cppclass milliseconds:
14+
milliseconds(long long) except +
15+
16+
cdef extern from "<cupti.h>" nogil:
17+
ctypedef enum CUpti_CallbackId:
18+
pass
19+
20+
21+
cdef extern from "<rapidsmpf/cupti.hpp>" nogil:
22+
cdef struct cpp_MemoryDataPoint "rapidsmpf::MemoryDataPoint":
23+
double timestamp
24+
size_t free_memory
25+
size_t total_memory
26+
size_t used_memory
27+
28+
cdef cppclass cpp_CuptiMonitor "rapidsmpf::CuptiMonitor":
29+
cpp_CuptiMonitor(
30+
bool_t enable_periodic_sampling,
31+
milliseconds sampling_interval_ms
32+
) except +
33+
void start_monitoring() except +
34+
void stop_monitoring() except +
35+
bool_t is_monitoring() except +
36+
void capture_memory_sample() except +
37+
const vector[cpp_MemoryDataPoint]& get_memory_samples() except +
38+
void clear_samples() except +
39+
size_t get_sample_count() except +
40+
void write_csv(const string& filename) except +
41+
void set_debug_output(bool_t enabled, size_t threshold_mb) except +
42+
unordered_map[CUpti_CallbackId, size_t] get_callback_counters() except +
43+
void clear_callback_counters() except +
44+
size_t get_total_callback_count() except +
45+
string get_callback_summary() except +
46+
47+
48+
cdef class MemoryDataPoint:
49+
cdef cpp_MemoryDataPoint _data
50+
51+
@staticmethod
52+
cdef MemoryDataPoint from_cpp(cpp_MemoryDataPoint data)
53+
54+
55+
cdef class CuptiMonitor:
56+
cdef unique_ptr[cpp_CuptiMonitor] _handle
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
from __future__ import annotations
5+
6+
class MemoryDataPoint:
7+
@property
8+
def timestamp(self) -> float: ...
9+
@property
10+
def free_memory(self) -> int: ...
11+
@property
12+
def total_memory(self) -> int: ...
13+
@property
14+
def used_memory(self) -> int: ...
15+
def __repr__(self) -> str: ...
16+
17+
class CuptiMonitor:
18+
def __init__(
19+
self, enable_periodic_sampling: bool = ..., sampling_interval_ms: int = ...
20+
) -> None: ...
21+
def start_monitoring(self) -> None: ...
22+
def stop_monitoring(self) -> None: ...
23+
def is_monitoring(self) -> bool: ...
24+
def capture_memory_sample(self) -> None: ...
25+
def get_memory_samples(self) -> list[MemoryDataPoint]: ...
26+
def clear_samples(self) -> None: ...
27+
def get_sample_count(self) -> int: ...
28+
def write_csv(self, filename: str) -> None: ...
29+
def set_debug_output(self, enabled: bool, threshold_mb: int = ...) -> None: ...
30+
def get_callback_counters(self) -> dict[int, int]: ...
31+
def clear_callback_counters(self) -> None: ...
32+
def get_total_callback_count(self) -> int: ...
33+
def get_callback_summary(self) -> str: ...

0 commit comments

Comments
 (0)