Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions cpp/include/cuml/fil/forest_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,9 +297,12 @@ struct forest_model {
infer_kind predict_type = infer_kind::default_kind,
std::optional<index_type> specified_chunk_size = std::nullopt)
{
// TODO(wphicks): Make sure buffer lands on same device as model
auto out_buffer = raft_proto::buffer{output, num_rows * num_outputs(), out_mem_type};
auto in_buffer = raft_proto::buffer{input, num_rows * num_features(), in_mem_type};
int current_device_id;
raft_proto::cuda_check(cudaGetDevice(&current_device_id));
Comment thread
hcho3 marked this conversation as resolved.
auto out_buffer =
raft_proto::buffer{output, num_rows * num_outputs(), out_mem_type, current_device_id};
auto in_buffer =
raft_proto::buffer{input, num_rows * num_features(), in_mem_type, current_device_id};
predict(handle, out_buffer, in_buffer, predict_type, specified_chunk_size);
}

Expand Down
48 changes: 34 additions & 14 deletions python/cuml/cuml/fil/fil.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ from cuml.fil.postprocessing cimport element_op, row_op
from cuml.fil.tree_layout cimport tree_layout as fil_tree_layout
from cuml.internals.treelite cimport *

from cuda.bindings import runtime


cdef extern from "cuml/fil/forest_model.hpp" namespace "ML::fil" nogil:
cdef cppclass forest_model:
Expand Down Expand Up @@ -154,7 +156,7 @@ cdef class ForestInference_impl():
align_bytes=0,
use_double_precision=None,
mem_type=None,
device_id=0
device_id=None,
):
# Store reference to RAFT handle to control lifetime, since raft_proto
# handle keeps a pointer to it
Expand Down Expand Up @@ -197,6 +199,14 @@ cdef class ForestInference_impl():
else:
raise RuntimeError(f"Unrecognized tree layout {layout}")

# Use assertion here, since device_id being None would indicate
# a bug, not a user error. The outer ForestInference object
# should set an integer device_id before passing it to
# ForestInference_impl.
assert device_id is not None, (
"device_id should be set before building ForestInference_impl"
)

self.model = import_from_treelite_handle(
tl_handle,
tree_layout,
Expand Down Expand Up @@ -457,9 +467,10 @@ class ForestInference(Base, CMajorInputTagMixin):
only for models trained and double precision and when exact
conformance between results from FIL and the original training
framework is of paramount importance.
device_id : int, default=0
device_id : int or None, default=None
For GPU execution, the device on which to load and execute this
model. For CPU execution, this value is currently ignored.
model. If set to None, use the currently active device.
For CPU execution, this value is currently ignored.
"""

def _reload_model(self):
Expand Down Expand Up @@ -553,7 +564,7 @@ class ForestInference(Base, CMajorInputTagMixin):
try:
return self._device_id_
except AttributeError:
self._device_id_ = 0
self._device_id_ = None
return self._device_id_

@device_id.setter
Expand All @@ -562,14 +573,13 @@ class ForestInference(Base, CMajorInputTagMixin):
old_value = self.device_id
except AttributeError:
old_value = None
if value is not None:
self._device_id_ = value
if (
self.treelite_model is not None
and self.device_id != old_value
and hasattr(self, '_gpu_forest')
):
self._load_to_fil(device_id=self.device_id)
self._device_id_ = value
if (
self.treelite_model is not None
and self.device_id != old_value
and hasattr(self, '_gpu_forest')
):
self._load_to_fil(device_id=self.device_id)

@property
def treelite_model(self):
Expand Down Expand Up @@ -616,7 +626,7 @@ class ForestInference(Base, CMajorInputTagMixin):
default_chunk_size=None,
align_bytes=None,
precision='single',
device_id=0,
device_id=None,
):
super().__init__(
handle=handle, verbose=verbose, output_type=output_type
Expand All @@ -633,12 +643,22 @@ class ForestInference(Base, CMajorInputTagMixin):
self.treelite_model = treelite_model
self._load_to_fil(device_id=self.device_id)

def _load_to_fil(self, mem_type=None, device_id=0):
def _load_to_fil(self, mem_type=None, device_id=None):
if mem_type is None:
mem_type = GlobalSettings().fil_memory_type
else:
mem_type = MemoryType.from_str(mem_type)

if device_id is None:
# If no device ID is explicitly given, use the currently
# active device
status, current_device_id = runtime.cudaGetDevice()
if status != runtime.cudaError_t.cudaSuccess:
_, name = runtime.cudaGetErrorName(status)
_, msg = runtime.cudaGetErrorString(status)
raise RuntimeError(f"Failed to run cudaGetDevice(). {name}: {msg}")
device_id = current_device_id

if mem_type.is_device_accessible:
self.device_id = device_id

Expand Down
99 changes: 99 additions & 0 deletions python/cuml/cuml/tests/test_fil.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#

import os
from contextlib import nullcontext
from math import ceil

import cupy as cp
import numpy as np
import pandas as pd
import pytest
Expand All @@ -37,6 +39,9 @@
from sklearn.model_selection import train_test_split # noqa: E402

from cuml import ForestInference # noqa: E402
from cuml.ensemble import ( # noqa: E402
RandomForestClassifier as cumlRandomForestClassifier,
)
from cuml.fil import get_fil_device_type, set_fil_device_type # noqa: E402
from cuml.internals.device_type import DeviceType # noqa: E402
from cuml.internals.global_settings import GlobalSettings # noqa: E402
Expand Down Expand Up @@ -899,6 +904,100 @@ def test_missing_categorical(category_list):
np.testing.assert_equal(fil_preds.flatten(), gtil_preds.flatten())


@pytest.mark.parametrize("device_id", [None, 0, 1, 2])
@pytest.mark.parametrize("model_kind", ["sklearn", "xgboost", "cuml"])
def test_device_selection(device_id, model_kind, tmp_path):
current_device = cp.cuda.runtime.getDevice()

if device_id is not None and device_id >= cp.cuda.runtime.getDeviceCount():
pytest.skip(
reason="device_id larger than the number of available GPU devices"
)

n_rows = 1000
n_columns = 30
n_classes = 3
n_estimators = 10

X, y = simulate_data(
n_rows,
n_columns,
n_classes,
random_state=0,
classification=True,
)

# 1. Model can be loaded with device_id set
if model_kind == "sklearn":
skl_model = RandomForestClassifier(
max_depth=3, random_state=0, n_estimators=n_estimators
)
skl_model.fit(X, y)
fm = ForestInference.load_from_sklearn(
skl_model,
precision="native",
is_classifier=True,
device_id=device_id,
)
elif model_kind == "xgboost":
xgb_model = xgb.XGBClassifier(
max_depth=3, random_state=0, n_estimators=n_estimators
)
xgb_model.fit(X, y)
model_path = os.path.join(tmp_path, "xgb_class.ubj")
xgb_model.save_model(model_path)
fm = ForestInference.load(
model_path,
model_type="xgboost_ubj",
precision="native",
is_classifier=True,
device_id=device_id,
)
elif model_kind == "cuml":
device_context = (
cp.cuda.Device(device_id) if device_id else nullcontext()
)

with device_context:
# TODO(hcho3): Remove n_streams=1 argument once the bug
# https://github.com/rapidsai/cuml/issues/5983 is resolved
cuml_model = cumlRandomForestClassifier(
max_depth=3,
random_state=0,
n_estimators=n_estimators,
n_streams=1,
Comment thread
csadorf marked this conversation as resolved.
)
cuml_model.fit(cp.array(X), cp.array(y))
fm = cuml_model.convert_to_fil_model()
else:
raise NotImplementedError()

# 2. The section above didn't corrupt current device context
assert cp.cuda.runtime.getDevice() == current_device

# 3. Device selection is correctly saved to device_id property
assert fm.device_id == (device_id if device_id else 0)

# 4. Inference can run on an input with the selected device
device_context = cp.cuda.Device(device_id) if device_id else nullcontext()
with device_context:
_ = fm.predict_proba(cp.array(X))

# 5. The section above didn't corrupt current device context
assert cp.cuda.runtime.getDevice() == current_device

# 6. Attempting to run inference with an input from a different device
# is an error
if device_id is not None and device_id != 0:
with cp.cuda.Device(0), pytest.raises(
RuntimeError, match=r".*I/O data on different device than model.*"
):
_ = fm.predict_proba(cp.array(X))

# 7. The section above didn't corrupt current device context
assert cp.cuda.runtime.getDevice() == current_device


def test_wide_data():
n_rows = 50
n_features = 100000
Expand Down