rapidsai · rapids-bot · May 16, 2025 · May 13, 2025 · May 15, 2025 · May 15, 2025
diff --git a/docs/source/zero-code-change.rst b/docs/source/zero-code-change.rst
@@ -74,9 +74,12 @@ that will always be run with NVIDIA GPUs available, it may be worthwhile to
 write your code directly with cuML.
 
 Additionally, running code directly with cuML offers finer control over GPU
-memory usage. ``cuml.accel`` will automatically use `unified or managed memory <https://developer.nvidia.com/blog/unified-memory-cuda-beginners/>`_
-for allocations in order to reduce the risk of CUDA OOM errors. In
-contrast, cuML defaults to ordinary device memory, which can offer improved
+memory usage. ``cuml.accel`` will enable `unified or managed memory
+<https://developer.nvidia.com/blog/unified-memory-cuda-beginners/>`_ (provided
+the platform supports it and `rmm
+<https://docs.rapids.ai/api/rmm/stable/guide/>`_ hasn't already been configured).
+Using managed memory can help reduce the risk of CUDA out-of-memory errors.
+In contrast, cuML defaults to ordinary device memory, which can offer improved
 performance but requires slightly more care to avoid exhausting the GPU VRAM.
 If you experience unexpectedly slow performance with ``cuml.accel``, you can
 try disabling the use of unified memory with the ``--disable-uvm`` flag.
@@ -163,10 +166,31 @@ data before measuring runtime on a full-scale dataset.
 
 6. Will I run out of GPU memory if I use ``cuml.accel``?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-``cuml.accel`` will use CUDA `managed memory <https://developer.nvidia.com/blog/unified-memory-cuda-beginners/>`_ for allocations on NVIDIA GPUs. This means that host memory can be used to augment GPU memory, and data will be migrated automatically as necessary. This does not mean that ``cuml.accel`` is entirely impervious to OOM errors, however. Very large datasets can exhaust the entirety of both host and device memory. Additionally, if device memory is heavily oversubscribed, it can lead to slow execution. ``cuml.accel`` is designed to minimize both possibilities, but if you observe OOM errors or slow execution on data that should fit in combined host plus device memory for your system, please `report it <https://github.com/rapidsai/cuml/issues/new?template=bug_report.md>`_, and the RAPIDS team will investigate.
+
+When possible, ``cuml.accel`` will enable `managed memory
+<https://developer.nvidia.com/blog/unified-memory-cuda-beginners/>`_ for
+allocations on NVIDIA GPUs. This means that host memory can be used to augment
+GPU memory, and data will be migrated automatically as necessary. This does not
+mean that ``cuml.accel`` is entirely impervious to OOM errors, however. Very
+large datasets can exhaust the entirety of both host and device memory.
+Additionally, if device memory is heavily oversubscribed, it can lead to slow
+execution. ``cuml.accel`` is designed to minimize both possibilities, but if
+you observe OOM errors or slow execution on data that should fit in combined
+host plus device memory for your system, please `report it
+<https://github.com/rapidsai/cuml/issues/new?template=bug_report.md>`_, and the
+RAPIDS team will investigate.
 
 .. note::
-   When running in Windows Subsystem for Linux 2 (WSL2), managed memory is not supported. Users may need to be more careful about memory management and consider using the ``--disable-uvm`` flag if experiencing memory-related issues.
+
+   Managed memory will not be enabled:
+
+   - When running in Windows Subsystem for Linux 2 (WSL2), where it's not
+     supported.
+   - When `rmm <https://docs.rapids.ai/api/rmm/stable/guide/>`_ is already
+     configured externally to `cuml.accel`.
+
+   Users in these situations may need to be more cognizant about their GPU
+   memory usage to ensure they don't exceed the memory capacity of their GPU.
 
 7. What is the relationship between ``cuml.accel`` and ``cudf.pandas``?
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

@@ -86,10 +86,24 @@ def install(disable_uvm=False):
         if _is_concurrent_managed_access_supported():
             import rmm
 
-            logger.debug("cuML: Enabling managed memory...")
-            rmm.mr.set_current_device_resource(rmm.mr.ManagedMemoryResource())
+            mr = rmm.mr.get_current_device_resource()
+            if isinstance(mr, rmm.mr.ManagedMemoryResource):
+                # Nothing to do
+                pass
+            elif not isinstance(mr, rmm.mr.CudaMemoryResource):
+                logger.debug(
+                    "cuML: A non-default memory resource is already configured, "
+                    "skipping enabling managed memory."
+                )
+            else:
+                rmm.mr.set_current_device_resource(
+                    rmm.mr.ManagedMemoryResource()
+                )
+                logger.debug("cuML: Enabled managed memory.")
         else:
-            logger.warn("cuML: Could not enable managed memory.")
+            logger.debug(
+                "cuML: Could not enable managed memory on this platform."
+            )
 
     ACCEL.install()
     set_global_output_type("numpy")

@@ -0,0 +1,98 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess
+import sys
+from textwrap import dedent
+
+import pytest
+
+pytest.importorskip("IPython")
+
+
+SCRIPT_HEADER = """
+from IPython.core.interactiveshell import InteractiveShell
+from traitlets.config import Config
+c = Config()
+c.HistoryManager.hist_file = ":memory:"
+ip = InteractiveShell(config=c)
+"""
+
+
+def run_script(body):
+    script = SCRIPT_HEADER + dedent(body)
+
+    res = subprocess.run(
+        [sys.executable, "-c", script],
+        stderr=subprocess.STDOUT,
+        stdout=subprocess.PIPE,
+        text=True,
+    )
+    # Pull out attributes before assert for nicer error reporting on failure
+    returncode = res.returncode
+    stdout = res.stdout
+    assert returncode == 0, stdout
+
+
+def test_magic():
+    run_script(
+        """
+        ip.run_line_magic("load_ext", "cuml.accel")
+
+        # cuml.accel proxies setup properly
+        ip.run_cell("from sklearn.linear_model import LinearRegression")
+        ip.run_cell("from cuml.accel import is_proxy")
+        ip.run_cell("assert is_proxy(LinearRegression)").raise_error()
+        """
+    )
+
+
+def test_magic_cudf_pandas_before():
+    run_script(
+        """
+        ip.run_line_magic("load_ext", "cudf.pandas")
+        ip.run_cell("import rmm; mr = rmm.mr.get_current_device_resource();")
+
+        ip.run_line_magic("load_ext", "cuml.accel")
+        ip.run_cell("mr2 = rmm.mr.get_current_device_resource();")
+
+        # cuml doesn't change the mr setup by cudf.pandas
+        ip.run_cell("assert mr is mr2").raise_error()
+
+        # cuml.accel proxies setup properly
+        ip.run_cell("from sklearn.linear_model import LinearRegression")
+        ip.run_cell("from cuml.accel import is_proxy")
+        result = ip.run_cell("assert is_proxy(LinearRegression)").raise_error()
+        """
+    )
+
+
+def test_magic_cudf_pandas_after():
+    run_script(
+        """
+        ip.run_line_magic("load_ext", "cuml.accel")
+        ip.run_cell("import rmm; mr = rmm.mr.get_current_device_resource();")
+
+        ip.run_line_magic("load_ext", "cudf.pandas")
+        ip.run_cell("mr2 = rmm.mr.get_current_device_resource();")
+
+        # cudf.pandas doesn't change the mr setup by cuml.accel
+        ip.run_cell("assert mr is mr2").raise_error()
+
+        # cuml.accel proxies setup properly
+        ip.run_cell("from sklearn.linear_model import LinearRegression")
+        ip.run_cell("from cuml.accel import is_proxy")
+        result = ip.run_cell("assert is_proxy(LinearRegression)").raise_error()
+        """
+    )